Background

Three Card Poker is a simple game in which the player and the dealer each receive 3 cards from a standard 52-card deck. Hand ranks from high to low are:

Of note, a straight is better than a flush in Three Card Poker. The player makes an Ante wager and then looks at their cards. The player may either make a Play wager equal to their Ante or may fold and lose only their Ante. If the player makes the Play bet, the dealer reveals their cards, with bets resolved as follows:

In addition, the player receives an “Ante Bonus” for holding a very strong hand, regardless of whether the dealer qualifies (Q32 or better) or even beats the player. Typically, the Ante bonus is 1 for Straight, 4 for Three-of-a-Kind, and 5 for Straight Flush

This file explores hands and outcomes in Three Card Poker. Tidyverse and several custom functions are used throughout:

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
source("./Generic_Added_Utility_Functions_202105_v001.R")

Analysis

All possible combinations of 52 cards are gathered in a matrix:

# All combinations of 3 cards from a 52-card deck
aHands <- combn(1:52, 3) %>% t()
str(aHands)
##  int [1:22100, 1:3] 1 1 1 1 1 1 1 1 1 1 ...
# Suits (1-13 are 1, 14-26 are 2, 27-39 are 3, 40-52 are 4)
aSuits <- 1 + (aHands-1) %/% 13
str(aSuits)
##  num [1:22100, 1:3] 1 1 1 1 1 1 1 1 1 1 ...
table(aSuits)
## aSuits
##     1     2     3     4 
## 16575 16575 16575 16575
# Ranks (1/14/27/40 are A stored as 1, ... , 13/26/39/52 are K stored as 13)
aRanks <- 1 + (aHands-1) %% 13
str(aRanks)
##  num [1:22100, 1:3] 1 1 1 1 1 1 1 1 1 1 ...
table(aRanks)
## aRanks
##    1    2    3    4    5    6    7    8    9   10   11   12   13 
## 5100 5100 5100 5100 5100 5100 5100 5100 5100 5100 5100 5100 5100

The combinations of suits (and therefore flushes) are explored:

tblSuits <- as.data.frame(sapply(1:4, 
                                 FUN=function(x) apply(aSuits[,], 
                                                       1, 
                                                       FUN=function(y) sum(y==x)
                                                       )
                                 )
                          ) %>% 
    tibble::as_tibble() %>%
    mutate(nMax=apply(., 1, FUN=function(x) max(x)), 
           isFlush=(nMax==3), 
           handNum=row_number()
           ) %>%
    select(handNum, everything())

# Sample of tibble
tblSuits
## # A tibble: 22,100 × 7
##    handNum    V1    V2    V3    V4  nMax isFlush
##      <int> <int> <int> <int> <int> <int> <lgl>  
##  1       1     3     0     0     0     3 TRUE   
##  2       2     3     0     0     0     3 TRUE   
##  3       3     3     0     0     0     3 TRUE   
##  4       4     3     0     0     0     3 TRUE   
##  5       5     3     0     0     0     3 TRUE   
##  6       6     3     0     0     0     3 TRUE   
##  7       7     3     0     0     0     3 TRUE   
##  8       8     3     0     0     0     3 TRUE   
##  9       9     3     0     0     0     3 TRUE   
## 10      10     3     0     0     0     3 TRUE   
## # ℹ 22,090 more rows
# Counts of suit distributions
tblSuits %>% 
    count(nMax, isFlush, V1, V2, V3, V4)
## # A tibble: 20 × 7
##     nMax isFlush    V1    V2    V3    V4     n
##    <int> <lgl>   <int> <int> <int> <int> <int>
##  1     1 FALSE       0     1     1     1  2197
##  2     1 FALSE       1     0     1     1  2197
##  3     1 FALSE       1     1     0     1  2197
##  4     1 FALSE       1     1     1     0  2197
##  5     2 FALSE       0     0     1     2  1014
##  6     2 FALSE       0     0     2     1  1014
##  7     2 FALSE       0     1     0     2  1014
##  8     2 FALSE       0     1     2     0  1014
##  9     2 FALSE       0     2     0     1  1014
## 10     2 FALSE       0     2     1     0  1014
## 11     2 FALSE       1     0     0     2  1014
## 12     2 FALSE       1     0     2     0  1014
## 13     2 FALSE       1     2     0     0  1014
## 14     2 FALSE       2     0     0     1  1014
## 15     2 FALSE       2     0     1     0  1014
## 16     2 FALSE       2     1     0     0  1014
## 17     3 TRUE        0     0     0     3   286
## 18     3 TRUE        0     0     3     0   286
## 19     3 TRUE        0     3     0     0   286
## 20     3 TRUE        3     0     0     0   286

The combinations of ranks (and therefore trips and straights and pairs) is explored, and combined with the suits data to create overall hand types:

tblRanks <- as.data.frame(sapply(1:13, 
                                 FUN=function(x) apply(aRanks[,], 
                                                       1, 
                                                       FUN=function(y) sum(y==x)
                                                       )
                                 )
                          ) %>% 
    tibble::as_tibble() %>%
    mutate(nMax=apply(., 1, FUN=function(x) max(x)), 
           isTrips=(nMax==3),
           isPair=(nMax==2),
           handNum=row_number()
           ) %>%
    select(handNum, everything())

# Matrix of ranks that constitute a straight
mtxStrRanks <- matrix(data=0L, nrow=13, ncol=12)
for(idx in 1:11) mtxStrRanks[idx:(idx+2), idx] <- 1L
mtxStrRanks[c(1, 12, 13), 12] <- 1L
mtxStrRanks
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
##  [1,]    1    0    0    0    0    0    0    0    0     0     0     1
##  [2,]    1    1    0    0    0    0    0    0    0     0     0     0
##  [3,]    1    1    1    0    0    0    0    0    0     0     0     0
##  [4,]    0    1    1    1    0    0    0    0    0     0     0     0
##  [5,]    0    0    1    1    1    0    0    0    0     0     0     0
##  [6,]    0    0    0    1    1    1    0    0    0     0     0     0
##  [7,]    0    0    0    0    1    1    1    0    0     0     0     0
##  [8,]    0    0    0    0    0    1    1    1    0     0     0     0
##  [9,]    0    0    0    0    0    0    1    1    1     0     0     0
## [10,]    0    0    0    0    0    0    0    1    1     1     0     0
## [11,]    0    0    0    0    0    0    0    0    1     1     1     0
## [12,]    0    0    0    0    0    0    0    0    0     1     1     1
## [13,]    0    0    0    0    0    0    0    0    0     0     1     1
# tblRanks converted to matrix of yes/no by rank
mtxBoolRanks <- tblRanks %>%
    select(V1:V13) %>%
    mutate(across(.cols=everything(), .fns=function(x) pmin(1L, x))) %>%
    as.matrix()
str(mtxBoolRanks)
##  int [1:22100, 1:13] 1 1 1 1 1 1 1 1 1 1 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:13] "V1" "V2" "V3" "V4" ...
# Check for each type of straight
tblRanks <- tblRanks %>%
    mutate(maxStr=mtxBoolRanks %*% mtxStrRanks %>% apply(., 1, FUN=max), 
           isStraight=(maxStr==3)
           )

# Example of tibble
tblRanks
## # A tibble: 22,100 × 19
##    handNum    V1    V2    V3    V4    V5    V6    V7    V8    V9   V10   V11
##      <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
##  1       1     1     1     1     0     0     0     0     0     0     0     0
##  2       2     1     1     0     1     0     0     0     0     0     0     0
##  3       3     1     1     0     0     1     0     0     0     0     0     0
##  4       4     1     1     0     0     0     1     0     0     0     0     0
##  5       5     1     1     0     0     0     0     1     0     0     0     0
##  6       6     1     1     0     0     0     0     0     1     0     0     0
##  7       7     1     1     0     0     0     0     0     0     1     0     0
##  8       8     1     1     0     0     0     0     0     0     0     1     0
##  9       9     1     1     0     0     0     0     0     0     0     0     1
## 10      10     1     1     0     0     0     0     0     0     0     0     0
## # ℹ 22,090 more rows
## # ℹ 7 more variables: V12 <int>, V13 <int>, nMax <int>, isTrips <lgl>,
## #   isPair <lgl>, maxStr <dbl>, isStraight <lgl>
# Counts of hand types based on rank
tblRanks %>% 
    count(nMax, isTrips, isPair, maxStr, isStraight)
## # A tibble: 6 × 6
##    nMax isTrips isPair maxStr isStraight     n
##   <int> <lgl>   <lgl>   <dbl> <lgl>      <int>
## 1     1 FALSE   FALSE       1 FALSE       4544
## 2     1 FALSE   FALSE       2 FALSE      12992
## 3     1 FALSE   FALSE       3 TRUE         768
## 4     2 FALSE   TRUE        1 FALSE       2544
## 5     2 FALSE   TRUE        2 FALSE       1200
## 6     3 TRUE    FALSE       1 FALSE         52
# Counts by straight
tblRanks %>%
    filter(isStraight) %>%
    group_by(across(starts_with("V"))) %>%
    summarize(n=n(), .groups="drop")
## # A tibble: 12 × 14
##       V1    V2    V3    V4    V5    V6    V7    V8    V9   V10   V11   V12   V13
##    <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
##  1     0     0     0     0     0     0     0     0     0     0     1     1     1
##  2     0     0     0     0     0     0     0     0     0     1     1     1     0
##  3     0     0     0     0     0     0     0     0     1     1     1     0     0
##  4     0     0     0     0     0     0     0     1     1     1     0     0     0
##  5     0     0     0     0     0     0     1     1     1     0     0     0     0
##  6     0     0     0     0     0     1     1     1     0     0     0     0     0
##  7     0     0     0     0     1     1     1     0     0     0     0     0     0
##  8     0     0     0     1     1     1     0     0     0     0     0     0     0
##  9     0     0     1     1     1     0     0     0     0     0     0     0     0
## 10     0     1     1     1     0     0     0     0     0     0     0     0     0
## 11     1     0     0     0     0     0     0     0     0     0     0     1     1
## 12     1     1     1     0     0     0     0     0     0     0     0     0     0
## # ℹ 1 more variable: n <int>
# Create handTypes
tblHandTypes <- tblRanks %>%
    select(handNum, isTrips, isPair, isStraight) %>%
    full_join(select(tblSuits, handNum, isFlush), by="handNum") %>%
    mutate(handType=case_when(isStraight & isFlush ~ "01. SF", 
                              isTrips ~ "02. Trips", 
                              isStraight ~ "03. Straight", 
                              isFlush ~ "04. Flush", 
                              isPair ~ "05. Pair", 
                              TRUE ~ "06. High Card"
                              )
           )
tblHandTypes
## # A tibble: 22,100 × 6
##    handNum isTrips isPair isStraight isFlush handType 
##      <int> <lgl>   <lgl>  <lgl>      <lgl>   <chr>    
##  1       1 FALSE   FALSE  TRUE       TRUE    01. SF   
##  2       2 FALSE   FALSE  FALSE      TRUE    04. Flush
##  3       3 FALSE   FALSE  FALSE      TRUE    04. Flush
##  4       4 FALSE   FALSE  FALSE      TRUE    04. Flush
##  5       5 FALSE   FALSE  FALSE      TRUE    04. Flush
##  6       6 FALSE   FALSE  FALSE      TRUE    04. Flush
##  7       7 FALSE   FALSE  FALSE      TRUE    04. Flush
##  8       8 FALSE   FALSE  FALSE      TRUE    04. Flush
##  9       9 FALSE   FALSE  FALSE      TRUE    04. Flush
## 10      10 FALSE   FALSE  FALSE      TRUE    04. Flush
## # ℹ 22,090 more rows
tblHandTypes %>%
    count(handType, isTrips, isStraight, isFlush, isPair) %>%
    arrange(n) %>%
    mutate(pct=n/sum(n))
## # A tibble: 6 × 7
##   handType      isTrips isStraight isFlush isPair     n     pct
##   <chr>         <lgl>   <lgl>      <lgl>   <lgl>  <int>   <dbl>
## 1 01. SF        FALSE   TRUE       TRUE    FALSE     48 0.00217
## 2 02. Trips     TRUE    FALSE      FALSE   FALSE     52 0.00235
## 3 03. Straight  FALSE   TRUE       FALSE   FALSE    720 0.0326 
## 4 04. Flush     FALSE   FALSE      TRUE    FALSE   1096 0.0496 
## 5 05. Pair      FALSE   FALSE      FALSE   TRUE    3744 0.169  
## 6 06. High Card FALSE   FALSE      FALSE   FALSE  16440 0.744

Hands by type are given a ranking:

# Ranks database
tblHandRanks <- tblHandTypes %>%
    select(handNum, handType) %>%
    full_join(select(tblRanks, handNum, V1:V13)) %>%
    pivot_longer(cols=-c(handNum, handType)) %>%
    filter(value>0) %>%
    mutate(card=as.integer(str_replace(name, "V", "")), 
           card=ifelse(card==1, 14, card)
           ) %>%
    arrange(handNum, desc(value), desc(card))
## Joining with `by = join_by(handNum)`
tblHandRanks
## # A tibble: 62,452 × 5
##    handNum handType  name  value  card
##      <int> <chr>     <chr> <int> <dbl>
##  1       1 01. SF    V1        1    14
##  2       1 01. SF    V3        1     3
##  3       1 01. SF    V2        1     2
##  4       2 04. Flush V1        1    14
##  5       2 04. Flush V4        1     4
##  6       2 04. Flush V2        1     2
##  7       3 04. Flush V1        1    14
##  8       3 04. Flush V5        1     5
##  9       3 04. Flush V2        1     2
## 10       4 04. Flush V1        1    14
## # ℹ 62,442 more rows
# Hands without straights can be ranked directly
rnkNonStraight <- tblHandRanks %>%
    group_by(handNum) %>%
    mutate(rn=paste0("C", row_number())) %>%
    filter(!(handType %in% c("01. SF", "03. Straight"))) %>%
    pivot_wider(id_cols=c(handNum, handType), 
                names_from="rn", 
                values_from="card", 
                values_fill=0
                ) %>%
    ungroup()
rnkNonStraight
## # A tibble: 21,332 × 5
##    handNum handType     C1    C2    C3
##      <int> <chr>     <dbl> <dbl> <dbl>
##  1       2 04. Flush    14     4     2
##  2       3 04. Flush    14     5     2
##  3       4 04. Flush    14     6     2
##  4       5 04. Flush    14     7     2
##  5       6 04. Flush    14     8     2
##  6       7 04. Flush    14     9     2
##  7       8 04. Flush    14    10     2
##  8       9 04. Flush    14    11     2
##  9      10 04. Flush    14    12     2
## 10      11 04. Flush    14    13     2
## # ℹ 21,322 more rows
# Ranks within type for non-straights
rnkNonStraight <- rnkNonStraight %>%
    count(handType, C1, C2, C3, name="nSameRank") %>%
    arrange(handType, desc(C1), desc(C2), desc(C3)) %>%
    group_by(handType) %>%
    mutate(rankOfType=row_number()) %>%
    ungroup() %>%
    inner_join(rnkNonStraight, by=c("handType", "C1", "C2", "C3"))
rnkNonStraight
## # A tibble: 21,332 × 7
##    handType     C1    C2    C3 nSameRank rankOfType handNum
##    <chr>     <dbl> <dbl> <dbl>     <int>      <int>   <int>
##  1 02. Trips    14     0     0         4          1     547
##  2 02. Trips    14     0     0         4          1     560
##  3 02. Trips    14     0     0         4          1     963
##  4 02. Trips    14     0     0         4          1   13352
##  5 02. Trips    13     0     0         4          2   12623
##  6 02. Trips    13     0     0         4          2   12636
##  7 02. Trips    13     0     0         4          2   12883
##  8 02. Trips    13     0     0         4          2   19422
##  9 02. Trips    12     0     0         4          3   11855
## 10 02. Trips    12     0     0         4          3   11868
## # ℹ 21,322 more rows
# Ranks for hands with straights
rnkStraight <- tblHandRanks %>%
    group_by(handNum) %>%
    mutate(rn=paste0("C", row_number())) %>%
    filter(handType %in% c("01. SF", "03. Straight")) %>%
    pivot_wider(id_cols=c(handNum, handType), 
                names_from="rn", 
                values_from="card", 
                values_fill=0
                ) %>%
    ungroup() %>%
    mutate(C1=ifelse(C1==14 & C2==3, 1, C1))
rnkStraight
## # A tibble: 768 × 5
##    handNum handType        C1    C2    C3
##      <int> <chr>        <dbl> <dbl> <dbl>
##  1       1 01. SF           1     3     2
##  2      14 03. Straight     1     3     2
##  3      27 03. Straight     1     3     2
##  4      40 03. Straight     1     3     2
##  5      62 03. Straight     1     3     2
##  6      75 03. Straight     1     3     2
##  7      88 03. Straight     1     3     2
##  8     456 01. SF          14    13    12
##  9     469 03. Straight    14    13    12
## 10     482 03. Straight    14    13    12
## # ℹ 758 more rows
# Ranks within type for straights
rnkStraight <- rnkStraight %>%
    count(handType, C1, C2, C3, name="nSameRank") %>%
    arrange(handType, desc(C1), desc(C2), desc(C3)) %>%
    group_by(handType) %>%
    mutate(rankOfType=row_number()) %>%
    ungroup() %>%
    inner_join(rnkStraight, by=c("handType", "C1", "C2", "C3"))
rnkStraight
## # A tibble: 768 × 7
##    handType    C1    C2    C3 nSameRank rankOfType handNum
##    <chr>    <dbl> <dbl> <dbl>     <int>      <int>   <int>
##  1 01. SF      14    13    12         4          1     456
##  2 01. SF      14    13    12         4          1   13287
##  3 01. SF      14    13    12         4          1   19696
##  4 01. SF      14    13    12         4          1   21880
##  5 01. SF      13    12    11         4          2   10621
##  6 01. SF      13    12    11         4          2   18447
##  7 01. SF      13    12    11         4          2   21541
##  8 01. SF      13    12    11         4          2   22100
##  9 01. SF      12    11    10         4          3    9760
## 10 01. SF      12    11    10         4          3   18041
## # ℹ 758 more rows
# Full hand ranks
rnkAllHands <- bind_rows(rnkStraight, rnkNonStraight) %>%
    arrange(handType, rankOfType) %>%
    mutate(chgType=handType!=lag(handType), 
           chgRank=rankOfType!=lag(rankOfType), 
           chgVal=ifelse(row_number()==1, 1, ifelse(chgType|chgRank, 1, 0)), 
           rankOverall=cumsum(chgVal)
           )
rnkAllHands
## # A tibble: 22,100 × 11
##    handType    C1    C2    C3 nSameRank rankOfType handNum chgType chgRank
##    <chr>    <dbl> <dbl> <dbl>     <int>      <int>   <int> <lgl>   <lgl>  
##  1 01. SF      14    13    12         4          1     456 NA      NA     
##  2 01. SF      14    13    12         4          1   13287 FALSE   FALSE  
##  3 01. SF      14    13    12         4          1   19696 FALSE   FALSE  
##  4 01. SF      14    13    12         4          1   21880 FALSE   FALSE  
##  5 01. SF      13    12    11         4          2   10621 FALSE   TRUE   
##  6 01. SF      13    12    11         4          2   18447 FALSE   FALSE  
##  7 01. SF      13    12    11         4          2   21541 FALSE   FALSE  
##  8 01. SF      13    12    11         4          2   22100 FALSE   FALSE  
##  9 01. SF      12    11    10         4          3    9760 FALSE   TRUE   
## 10 01. SF      12    11    10         4          3   18041 FALSE   FALSE  
## # ℹ 22,090 more rows
## # ℹ 2 more variables: chgVal <dbl>, rankOverall <dbl>
rnkAllHands %>% count(rankOverall, name="nRankOverall")
## # A tibble: 741 × 2
##    rankOverall nRankOverall
##          <dbl>        <int>
##  1           1            4
##  2           2            4
##  3           3            4
##  4           4            4
##  5           5            4
##  6           6            4
##  7           7            4
##  8           8            4
##  9           9            4
## 10          10            4
## # ℹ 731 more rows
rnkAllHands %>% 
    count(rankOverall, handType, name="nRankOverall") %>% 
    count(handType, nRankOverall)
## # A tibble: 6 × 3
##   handType      nRankOverall     n
##   <chr>                <int> <int>
## 1 01. SF                   4    12
## 2 02. Trips                4    13
## 3 03. Straight            60    12
## 4 04. Flush                4   274
## 5 05. Pair                24   156
## 6 06. High Card           60   274

A list of hand numbers containing each card (1-52) is created, as well as a function to generate excluded hand numbers:

cardInHand <- lapply(1:52, FUN=function(x) which(rowSums(aHands==x)==1))
str(cardInHand)
## List of 52
##  $ : int [1:1275] 1 2 3 4 5 6 7 8 9 10 ...
##  $ : int [1:1275] 1 2 3 4 5 6 7 8 9 10 ...
##  $ : int [1:1275] 1 51 52 53 54 55 56 57 58 59 ...
##  $ : int [1:1275] 2 51 100 101 102 103 104 105 106 107 ...
##  $ : int [1:1275] 3 52 100 148 149 150 151 152 153 154 ...
##  $ : int [1:1275] 4 53 101 148 195 196 197 198 199 200 ...
##  $ : int [1:1275] 5 54 102 149 195 241 242 243 244 245 ...
##  $ : int [1:1275] 6 55 103 150 196 241 286 287 288 289 ...
##  $ : int [1:1275] 7 56 104 151 197 242 286 330 331 332 ...
##  $ : int [1:1275] 8 57 105 152 198 243 287 330 373 374 ...
##  $ : int [1:1275] 9 58 106 153 199 244 288 331 373 415 ...
##  $ : int [1:1275] 10 59 107 154 200 245 289 332 374 415 ...
##  $ : int [1:1275] 11 60 108 155 201 246 290 333 375 416 ...
##  $ : int [1:1275] 12 61 109 156 202 247 291 334 376 417 ...
##  $ : int [1:1275] 13 62 110 157 203 248 292 335 377 418 ...
##  $ : int [1:1275] 14 63 111 158 204 249 293 336 378 419 ...
##  $ : int [1:1275] 15 64 112 159 205 250 294 337 379 420 ...
##  $ : int [1:1275] 16 65 113 160 206 251 295 338 380 421 ...
##  $ : int [1:1275] 17 66 114 161 207 252 296 339 381 422 ...
##  $ : int [1:1275] 18 67 115 162 208 253 297 340 382 423 ...
##  $ : int [1:1275] 19 68 116 163 209 254 298 341 383 424 ...
##  $ : int [1:1275] 20 69 117 164 210 255 299 342 384 425 ...
##  $ : int [1:1275] 21 70 118 165 211 256 300 343 385 426 ...
##  $ : int [1:1275] 22 71 119 166 212 257 301 344 386 427 ...
##  $ : int [1:1275] 23 72 120 167 213 258 302 345 387 428 ...
##  $ : int [1:1275] 24 73 121 168 214 259 303 346 388 429 ...
##  $ : int [1:1275] 25 74 122 169 215 260 304 347 389 430 ...
##  $ : int [1:1275] 26 75 123 170 216 261 305 348 390 431 ...
##  $ : int [1:1275] 27 76 124 171 217 262 306 349 391 432 ...
##  $ : int [1:1275] 28 77 125 172 218 263 307 350 392 433 ...
##  $ : int [1:1275] 29 78 126 173 219 264 308 351 393 434 ...
##  $ : int [1:1275] 30 79 127 174 220 265 309 352 394 435 ...
##  $ : int [1:1275] 31 80 128 175 221 266 310 353 395 436 ...
##  $ : int [1:1275] 32 81 129 176 222 267 311 354 396 437 ...
##  $ : int [1:1275] 33 82 130 177 223 268 312 355 397 438 ...
##  $ : int [1:1275] 34 83 131 178 224 269 313 356 398 439 ...
##  $ : int [1:1275] 35 84 132 179 225 270 314 357 399 440 ...
##  $ : int [1:1275] 36 85 133 180 226 271 315 358 400 441 ...
##  $ : int [1:1275] 37 86 134 181 227 272 316 359 401 442 ...
##  $ : int [1:1275] 38 87 135 182 228 273 317 360 402 443 ...
##  $ : int [1:1275] 39 88 136 183 229 274 318 361 403 444 ...
##  $ : int [1:1275] 40 89 137 184 230 275 319 362 404 445 ...
##  $ : int [1:1275] 41 90 138 185 231 276 320 363 405 446 ...
##  $ : int [1:1275] 42 91 139 186 232 277 321 364 406 447 ...
##  $ : int [1:1275] 43 92 140 187 233 278 322 365 407 448 ...
##  $ : int [1:1275] 44 93 141 188 234 279 323 366 408 449 ...
##  $ : int [1:1275] 45 94 142 189 235 280 324 367 409 450 ...
##  $ : int [1:1275] 46 95 143 190 236 281 325 368 410 451 ...
##  $ : int [1:1275] 47 96 144 191 237 282 326 369 411 452 ...
##  $ : int [1:1275] 48 97 145 192 238 283 327 370 412 453 ...
##  $ : int [1:1275] 49 98 146 193 239 284 328 371 413 454 ...
##  $ : int [1:1275] 50 99 147 194 240 285 329 372 414 455 ...
exclHands <- function(exclCards, lstExclude=cardInHand) {
    purrr::reduce(.x=lstExclude[c(exclCards)], .f=union)
}

# Example usages
str(exclHands(c(1)))
##  int [1:1275] 1 2 3 4 5 6 7 8 9 10 ...
str(exclHands(c(1, 14, 27)))
##  int [1:3676] 1 2 3 4 5 6 7 8 9 10 ...
str(exclHands(c(1:39)))
##  int [1:21814] 1 2 3 4 5 6 7 8 9 10 ...
str(exclHands(c(1:49)))
##  int [1:22099] 1 2 3 4 5 6 7 8 9 10 ...
# Confirm correct lengths of exclusion
choose(52,3) - choose(c(51, 49, 13, 3), 3)
## [1]  1275  3676 21814 22099
# Confirm distribution of remaining cards
# Note that 1e9 is a workaround to send a non-existent index to avoid mismatched formats
map_dfr(list(c(), c(1), c(1, 14, 27), c(1:39), c(1:49)), 
       .f=function(x) {
           a <- if(length(x)>0) table(aHands[-exclHands(x),]) else table(aHands[-1e9,])
           as.data.frame(a)
           }, 
       .id="src"
       ) %>%
    tibble::as_tibble() %>%
    mutate(src=c("1"="All", 
                 "2"="Excl 1", 
                 "3"="Excl 3", 
                 "4"="Excl 39", 
                 "5"="Excl 49"
                 )[src]) %>%
    pivot_wider(id_cols="Var1", names_from="src", values_from="Freq", values_fill=0) %>%
    rename(Card=Var1) %>%
    print(n=52)
## # A tibble: 52 × 6
##    Card    All `Excl 1` `Excl 3` `Excl 39` `Excl 49`
##    <fct> <int>    <int>    <int>     <int>     <int>
##  1 1      1275        0        0         0         0
##  2 2      1275     1225     1128         0         0
##  3 3      1275     1225     1128         0         0
##  4 4      1275     1225     1128         0         0
##  5 5      1275     1225     1128         0         0
##  6 6      1275     1225     1128         0         0
##  7 7      1275     1225     1128         0         0
##  8 8      1275     1225     1128         0         0
##  9 9      1275     1225     1128         0         0
## 10 10     1275     1225     1128         0         0
## 11 11     1275     1225     1128         0         0
## 12 12     1275     1225     1128         0         0
## 13 13     1275     1225     1128         0         0
## 14 14     1275     1225        0         0         0
## 15 15     1275     1225     1128         0         0
## 16 16     1275     1225     1128         0         0
## 17 17     1275     1225     1128         0         0
## 18 18     1275     1225     1128         0         0
## 19 19     1275     1225     1128         0         0
## 20 20     1275     1225     1128         0         0
## 21 21     1275     1225     1128         0         0
## 22 22     1275     1225     1128         0         0
## 23 23     1275     1225     1128         0         0
## 24 24     1275     1225     1128         0         0
## 25 25     1275     1225     1128         0         0
## 26 26     1275     1225     1128         0         0
## 27 27     1275     1225        0         0         0
## 28 28     1275     1225     1128         0         0
## 29 29     1275     1225     1128         0         0
## 30 30     1275     1225     1128         0         0
## 31 31     1275     1225     1128         0         0
## 32 32     1275     1225     1128         0         0
## 33 33     1275     1225     1128         0         0
## 34 34     1275     1225     1128         0         0
## 35 35     1275     1225     1128         0         0
## 36 36     1275     1225     1128         0         0
## 37 37     1275     1225     1128         0         0
## 38 38     1275     1225     1128         0         0
## 39 39     1275     1225     1128         0         0
## 40 40     1275     1225     1128        66         0
## 41 41     1275     1225     1128        66         0
## 42 42     1275     1225     1128        66         0
## 43 43     1275     1225     1128        66         0
## 44 44     1275     1225     1128        66         0
## 45 45     1275     1225     1128        66         0
## 46 46     1275     1225     1128        66         0
## 47 47     1275     1225     1128        66         0
## 48 48     1275     1225     1128        66         0
## 49 49     1275     1225     1128        66         0
## 50 50     1275     1225     1128        66         1
## 51 51     1275     1225     1128        66         1
## 52 52     1275     1225     1128        66         1

Possible outcomes for an example hand are explored:

# Include whether the hand qualifies
tblHands <- rnkAllHands %>%
    mutate(qual=(handType!="06. High Card" | C1 >= 12))
tblHands %>% count(handType, qual) %>% mutate(pct=n/sum(n))
## # A tibble: 7 × 4
##   handType      qual      n     pct
##   <chr>         <lgl> <int>   <dbl>
## 1 01. SF        TRUE     48 0.00217
## 2 02. Trips     TRUE     52 0.00235
## 3 03. Straight  TRUE    720 0.0326 
## 4 04. Flush     TRUE   1096 0.0496 
## 5 05. Pair      TRUE   3744 0.169  
## 6 06. High Card FALSE  6720 0.304  
## 7 06. High Card TRUE   9720 0.440
# Pull an example hand number, get rank and exclusions
tmpHand <- 12345
aHands[tmpHand,]
## [1] 13 17 31
tblHands %>% filter(handNum==tmpHand)
## # A tibble: 1 × 12
##   handType    C1    C2    C3 nSameRank rankOfType handNum chgType chgRank chgVal
##   <chr>    <dbl> <dbl> <dbl>     <int>      <int>   <int> <lgl>   <lgl>    <dbl>
## 1 06. Hig…    13     5     4        60        113   12345 FALSE   FALSE        0
## # ℹ 2 more variables: rankOverall <dbl>, qual <lgl>
tmpRank <- tblHands %>% filter(handNum==tmpHand) %>% pull(rankOverall)
tmpRes <- tblHands %>%
    select(handNum, rankOverall, qual) %>%
    filter(!(handNum %in% exclHands(aHands[tmpHand,,drop=TRUE]))) %>%
    mutate(res=case_when(rankOverall<tmpRank~"L", rankOverall>tmpRank~"W", TRUE~"T")) %>%
    count(qual, res) %>%
    mutate(pct=n/sum(n), 
           resPlay=case_when(!qual~1, res=="L"~-2, res=="W"~2, TRUE~0)
           )
tmpRes
## # A tibble: 4 × 5
##   qual  res       n     pct resPlay
##   <lgl> <chr> <int>   <dbl>   <dbl>
## 1 FALSE W      5754 0.312         1
## 2 TRUE  L     10089 0.548        -2
## 3 TRUE  T        26 0.00141       0
## 4 TRUE  W      2555 0.139         2
cat("EV of playing:", 
    round(sum(tmpRes$pct*tmpRes$resPlay), 3), 
    "vs. EV of folding: -1.000\n"
    )
## EV of playing: -0.506 vs. EV of folding: -1.000

The process is converted to functional form:

assessHand <- function(hn, 
                       dbHands=tblHands, 
                       mtxHands=aHands, 
                       exclList=exclHands
                       ) {
    
    # FUNCTION ARGUMENTS:
    # hn: hand number to evaluate
    # dbHands: data frame containing map of hand number to overall rank and qualification
    # mtxHands: matrix of hands to cards, sorted by hand number
    # exclList: list of hands excluded by card
    
    # Get rank of requested hand
    keyRank <- dbHands %>% filter(handNum==hn) %>% pull(rankOverall)

    # Get and return outcomes of requested hand
    dbHands %>%
        select(handNum, rankOverall, qual) %>%
        filter(!(handNum %in% exclList(mtxHands[hn,,drop=TRUE]))) %>%
        mutate(res=case_when(rankOverall<keyRank~"L", 
                             rankOverall>keyRank~"W", 
                             TRUE~"T"
                             )
               ) %>%
        count(qual, res) %>%
        mutate(pct=n/sum(n), handNum=hn)

}

assessHand(12345)
## # A tibble: 4 × 5
##   qual  res       n     pct handNum
##   <lgl> <chr> <int>   <dbl>   <dbl>
## 1 FALSE W      5754 0.312     12345
## 2 TRUE  L     10089 0.548     12345
## 3 TRUE  T        26 0.00141   12345
## 4 TRUE  W      2555 0.139     12345
map_dfr(.x=1:100, .f=function(x) assessHand(x))
## # A tibble: 400 × 5
##    qual  res       n      pct handNum
##    <lgl> <chr> <int>    <dbl>   <int>
##  1 FALSE W      5706 0.310          1
##  2 TRUE  L        41 0.00223        1
##  3 TRUE  T         3 0.000163       1
##  4 TRUE  W     12674 0.688          1
##  5 FALSE W      5721 0.311          2
##  6 TRUE  L       896 0.0486         2
##  7 TRUE  T         3 0.000163       2
##  8 TRUE  W     11804 0.641          2
##  9 FALSE W      5724 0.311          3
## 10 TRUE  L       886 0.0481         3
## # ℹ 390 more rows

Every hand is assessed, with process cached and results saved as RDS:

t <- proc.time()
allTCPHandData <- map_dfr(.x=1:nrow(tblHands), .f=function(x) assessHand(x))
allTCPHandData
## # A tibble: 88,164 × 5
##    qual  res       n      pct handNum
##    <lgl> <chr> <int>    <dbl>   <int>
##  1 FALSE W      5706 0.310          1
##  2 TRUE  L        41 0.00223        1
##  3 TRUE  T         3 0.000163       1
##  4 TRUE  W     12674 0.688          1
##  5 FALSE W      5721 0.311          2
##  6 TRUE  L       896 0.0486         2
##  7 TRUE  T         3 0.000163       2
##  8 TRUE  W     11804 0.641          2
##  9 FALSE W      5724 0.311          3
## 10 TRUE  L       886 0.0481         3
## # ℹ 88,154 more rows
proc.time() - t
##    user  system elapsed 
##  861.93   46.34  922.68
saveToRDS(allTCPHandData)

Data are converted to a single line per hand:

# Counts of result by hand type
allTCPHandN <- allTCPHandData %>%
    mutate(name=paste0("n", res, ifelse(qual, "Q", "N"))) %>%
    pivot_wider(id_cols="handNum", names_from="name", values_from="n", values_fill=0)
allTCPHandN
## # A tibble: 22,100 × 7
##    handNum   nWN   nLQ   nTQ   nWQ   nLN   nTN
##      <int> <int> <int> <int> <int> <int> <int>
##  1       1  5706    41     3 12674     0     0
##  2       2  5721   896     3 11804     0     0
##  3       3  5724   886     3 11811     0     0
##  4       4  5724   877     3 11820     0     0
##  5       5  5724   865     3 11832     0     0
##  6       6  5724   850     3 11847     0     0
##  7       7  5724   832     3 11865     0     0
##  8       8  5709   811     3 11901     0     0
##  9       9  5694   787     3 11940     0     0
## 10      10  6195   764     3 11462     0     0
## # ℹ 22,090 more rows
# Percentages of result by hand type
allTCPHandPct <- allTCPHandData %>%
    mutate(name=paste0("p", res, ifelse(qual, "Q", "N"))) %>%
    pivot_wider(id_cols="handNum", names_from="name", values_from="pct", values_fill=0)
allTCPHandPct
## # A tibble: 22,100 × 7
##    handNum   pWN     pLQ      pTQ   pWQ   pLN   pTN
##      <int> <dbl>   <dbl>    <dbl> <dbl> <dbl> <dbl>
##  1       1 0.310 0.00223 0.000163 0.688     0     0
##  2       2 0.311 0.0486  0.000163 0.641     0     0
##  3       3 0.311 0.0481  0.000163 0.641     0     0
##  4       4 0.311 0.0476  0.000163 0.642     0     0
##  5       5 0.311 0.0469  0.000163 0.642     0     0
##  6       6 0.311 0.0461  0.000163 0.643     0     0
##  7       7 0.311 0.0452  0.000163 0.644     0     0
##  8       8 0.310 0.0440  0.000163 0.646     0     0
##  9       9 0.309 0.0427  0.000163 0.648     0     0
## 10      10 0.336 0.0415  0.000163 0.622     0     0
## # ℹ 22,090 more rows
# Identical results by hand type
eqResults <- allTCPHandN %>%
    arrange(nWN, nLQ, nTQ, nWQ, nLN, nTN) %>%
    mutate(oa=apply(select(., -handNum), 1, FUN=function(x) paste0(x, collapse="-")), 
           isNew=ifelse(row_number()==1, 1, ifelse(oa!=lag(oa), 1, 0)), 
           typeNum=cumsum(isNew)
           )
eqResults
## # A tibble: 22,100 × 10
##    handNum   nWN   nLQ   nTQ   nWQ   nLN   nTN oa                  isNew typeNum
##      <int> <int> <int> <int> <int> <int> <int> <chr>               <dbl>   <dbl>
##  1    1849     0 13147     0     0  5251    26 0-13147-0-0-5251-26     1       1
##  2    1862     0 13147     0     0  5251    26 0-13147-0-0-5251-26     0       1
##  3    1916     0 13147     0     0  5251    26 0-13147-0-0-5251-26     0       1
##  4    1929     0 13147     0     0  5251    26 0-13147-0-0-5251-26     0       1
##  5    2239     0 13147     0     0  5251    26 0-13147-0-0-5251-26     0       1
##  6    2280     0 13147     0     0  5251    26 0-13147-0-0-5251-26     0       1
##  7    2989     0 13147     0     0  5251    26 0-13147-0-0-5251-26     0       1
##  8    3002     0 13147     0     0  5251    26 0-13147-0-0-5251-26     0       1
##  9    3091     0 13147     0     0  5251    26 0-13147-0-0-5251-26     0       1
## 10    3104     0 13147     0     0  5251    26 0-13147-0-0-5251-26     0       1
## # ℹ 22,090 more rows
summary(eqResults)
##     handNum           nWN            nLQ             nTQ             nWQ       
##  Min.   :    1   Min.   :   0   Min.   :    0   Min.   : 0.00   Min.   :    0  
##  1st Qu.: 5526   1st Qu.:4481   1st Qu.: 4737   1st Qu.: 0.00   1st Qu.:    0  
##  Median :11050   Median :5717   Median : 9119   Median : 3.00   Median : 3565  
##  Mean   :11050   Mean   :4794   Mean   : 8401   Mean   :12.66   Mean   : 4408  
##  3rd Qu.:16575   3rd Qu.:5748   3rd Qu.:13114   3rd Qu.:25.00   3rd Qu.: 8299  
##  Max.   :22100   Max.   :6720   Max.   :13181   Max.   :26.00   Max.   :13228  
##       nLN              nTN              oa                isNew        
##  Min.   :   0.0   Min.   : 0.000   Length:22100       Min.   :0.00000  
##  1st Qu.:   0.0   1st Qu.: 0.000   Class :character   1st Qu.:0.00000  
##  Median :   0.0   Median : 0.000   Mode  :character   Median :0.00000  
##  Mean   : 800.7   Mean   : 7.723                      Mean   :0.07475  
##  3rd Qu.: 756.0   3rd Qu.:25.000                      3rd Qu.:0.00000  
##  Max.   :5251.0   Max.   :26.000                      Max.   :1.00000  
##     typeNum    
##  Min.   :   1  
##  1st Qu.: 333  
##  Median : 830  
##  Mean   : 815  
##  3rd Qu.:1261  
##  Max.   :1652

There are 1,652 unique combinations of outcomes across the 22,100 starting hands

The optimal decision is included, based on hand percentages:

allTCPHandAction <- allTCPHandPct %>%
    mutate(evPlay=(pWN+pTN+pLN)+2*(pWQ-pLQ), 
           action=ifelse(evPlay>=-1, "Play", "Fold"), 
           evOptimal=ifelse(action=="Fold", -1, evPlay)
           ) %>%
    select(handNum, evPlay, action, evOptimal)
allTCPHandAction
## # A tibble: 22,100 × 4
##    handNum evPlay action evOptimal
##      <int>  <dbl> <chr>      <dbl>
##  1       1   1.68 Play        1.68
##  2       2   1.49 Play        1.49
##  3       3   1.50 Play        1.50
##  4       4   1.50 Play        1.50
##  5       5   1.50 Play        1.50
##  6       6   1.50 Play        1.50
##  7       7   1.51 Play        1.51
##  8       8   1.51 Play        1.51
##  9       9   1.52 Play        1.52
## 10      10   1.50 Play        1.50
## # ℹ 22,090 more rows
allTCPHandAction %>%
    summarize(across(starts_with("ev"), mean))
## # A tibble: 1 × 2
##   evPlay evOptimal
##    <dbl>     <dbl>
## 1 -0.129   -0.0866
allTCPHandAction %>%
    group_by(action) %>%
    summarize(n=n(), across(starts_with("ev"), mean))
## # A tibble: 2 × 4
##   action     n evPlay evOptimal
##   <chr>  <int>  <dbl>     <dbl>
## 1 Fold    7200 -1.13     -1    
## 2 Play   14900  0.355     0.355
allTCPHandAction %>%
    full_join(select(tblHands, handNum, handType), by="handNum") %>%
    group_by(handType, action) %>%
    summarize(n=n(), across(starts_with("ev"), mean), .groups="drop")
## # A tibble: 7 × 5
##   handType      action     n evPlay evOptimal
##   <chr>         <chr>  <int>  <dbl>     <dbl>
## 1 01. SF        Play      48  1.69      1.69 
## 2 02. Trips     Play      52  1.68      1.68 
## 3 03. Straight  Play     720  1.61      1.61 
## 4 04. Flush     Play    1096  1.45      1.45 
## 5 05. Pair      Play    3744  1.01      1.01 
## 6 06. High Card Fold    7200 -1.13     -1    
## 7 06. High Card Play    9240 -0.152    -0.152

The decision function for hands with only a high card is plotted:

allTCPHandAction %>% 
    full_join(select(tblHands, handNum, handType, C1, C2), by="handNum") %>%
    filter(handType=="06. High Card") %>%
    group_by(C1, C2) %>%
    summarize(pctPlay=mean(action=="Play"), 
              evPlay=mean(evPlay), 
              n=n(), 
              .groups="drop"
              ) %>%
    ggplot(aes(y=factor(C1, levels=5:14), x=factor(C2, levels=13:3))) + 
    geom_tile(aes(fill=pctPlay)) + 
    geom_text(aes(label=paste0(round(100*pctPlay, 1), "%\n(", round(evPlay, 3), ")")), 
              size=2.5
              ) +
    scale_fill_continuous("% Play", low="white", high="lightgreen") + 
    labs(title="Percentage of hands played optimally (high-card only hands)",
         subtitle="Cell contents are % played (mean EV of playing)",
         y="Highest card (14=A, 13=K, 12=Q, 11=J)", 
         x="Second-highest card (13=K, 12=Q, 11=J)"
         )

The EV for hands with a pair is plotted:

allTCPHandAction %>% 
    full_join(select(tblHands, handNum, handType, C1, C2), by="handNum") %>%
    filter(handType=="05. Pair") %>%
    group_by(C1, C2) %>%
    summarize(pctPlay=mean(action=="Play"), 
              evPlay=mean(evPlay), 
              n=n(), 
              .groups="drop"
              ) %>%
    ggplot(aes(y=factor(C1, levels=2:14), x=factor(C2, levels=14:2))) + 
    geom_tile(aes(fill=evPlay)) +
    geom_text(aes(label=paste0(round(evPlay, 3))), 
              size=2.5
              ) + 
    scale_fill_continuous("EV Play", low="white", high="lightgreen", limits=c(0, 2)) +
    labs(title="EV of pair hands (pair hands always played)",
         subtitle="Cell contents are mean EV of playing",
         y="Paired card (14=A, 13=K, 12=Q, 11=J)", 
         x="Kicker (14=A, 13=K, 12=Q, 11=J)"
         )

Due to the unlikelihood of a kicker being needed when comparing a player and dealer pair, there are some non-monotonic features to EV. In general, a better kicker is less likely to block dealer qualifying (not A/K/Q) but more likely to block higher dealer pairs. For example, with a pair of 4s, J kicker has EV +0.81 while Q kicker has EV +0.79, and 5 kicker has EV +0.80 while 3 kicker has EV +0.77

The impact of suits on EV is explored:

allTCPHandAction %>% 
    full_join(select(tblHands, handNum, handType, C1, C2, C3), by="handNum") %>%
    full_join(select(tblSuits, handNum, nMax), by="handNum") %>%
    filter(handType=="06. High Card") %>%
    group_by(C1, C2, nMax) %>%
    summarize(n=n(), 
              evMean=mean(evPlay), 
              evMax=max(evPlay), 
              evMin=min(evPlay), 
              .groups="drop"
              ) %>%
    arrange(C1, C2, nMax) %>%
    group_by(C1, C2) %>%
    summarize(delta_21=last(evMean)-first(evMean), .groups="drop") %>%
    ggplot(aes(y=factor(C1, levels=5:14), x=factor(C2, levels=13:3))) + 
    geom_tile(aes(fill=delta_21)) +
    geom_text(aes(label=paste0("", round(delta_21, 4), "")), 
              size=2.5
              ) + 
    scale_fill_continuous("Delta EV", high="white", low="pink") +
    labs(title="Mean difference in EV for suits 2-1-0 vs. 1-1-1 (high card only)",
         subtitle="Negative reflects 2-1-0 having lower mean EV than 1-1-1",
         y="Highest card (14=A, 13=K, 12=Q, 11=J)", 
         x="Second-highest card (13=K, 12=Q, 11=J)"
         )

Suits play a very small role in mean EV for given high-card hand ranks, with impact ranging from ~0.0020 better for AKx rainbow to ~0.0005 better for Q32 rainbow

Function assessHand() is re-written to allow for specifying cards to be excluded:

oldAssess <- assessHand

assessHand <- function(hn=NULL, 
                       keyRank=NULL,
                       dbHands=tblHands, 
                       mtxHands=aHands,
                       exclCards=NULL,
                       exclList=exclHands
                       ) {
    
    # FUNCTION ARGUMENTS:
    # hn: hand number to evaluate (if NULL, need to pass keyRank and exclCards instead)
    # keyRank: hand rank for use in dbHands (if NULL, calculated from hn)
    # dbHands: data frame containing map of hand number to overall rank and qualification
    # mtxHands: matrix of hands to cards, sorted by hand number
    # exclCards: cards not available for other hands (if NULL, the cards in hn)
    # exclList: list of hands excluded by card
    
    # Get rank of requested hand
    if(is.null(keyRank)) {
        if(is.null(hn)) stop("\nMust pass either hn or keyRank\n")
        keyRank <- dbHands %>% filter(handNum==hn) %>% pull(rankOverall)
    }
    
    # Get cards to be excluded
    if(is.null(exclCards)) {
        if(is.null(hn)) stop("\nMust pass either hn or exclCards\n")
        exclCards <- mtxHands[hn,,drop=TRUE]
    }
    
    # Get and return outcomes of requested hand
    dbHands %>%
        select(handNum, rankOverall, qual) %>%
        filter(!(handNum %in% exclList(exclCards))) %>%
        mutate(res=case_when(rankOverall<keyRank~"L", 
                             rankOverall>keyRank~"W", 
                             TRUE~"T"
                             )
               ) %>%
        count(qual, res) %>%
        mutate(pct=n/sum(n), 
               handNum=ifelse(is.null(hn), NA_integer_, as.integer(hn)), 
               keyRank=keyRank
               )

}

# hn=123 has cards 1, 4, 28 (A42 with A-4 suited) and is of rank 531
oldAssess(hn=123)
## # A tibble: 4 × 5
##   qual  res       n     pct handNum
##   <lgl> <chr> <int>   <dbl>   <dbl>
## 1 FALSE W      5728 0.311       123
## 2 TRUE  L      7353 0.399       123
## 3 TRUE  T        25 0.00136     123
## 4 TRUE  W      5318 0.289       123
assessHand(hn=123)
## # A tibble: 4 × 6
##   qual  res       n     pct handNum keyRank
##   <lgl> <chr> <int>   <dbl>   <int>   <dbl>
## 1 FALSE W      5728 0.311       123     531
## 2 TRUE  L      7353 0.399       123     531
## 3 TRUE  T        25 0.00136     123     531
## 4 TRUE  W      5318 0.289       123     531
assessHand(keyRank=531, exclCards=c(1, 4, 28))
## # A tibble: 4 × 6
##   qual  res       n     pct handNum keyRank
##   <lgl> <chr> <int>   <dbl>   <int>   <dbl>
## 1 FALSE W      5728 0.311        NA     531
## 2 TRUE  L      7353 0.399        NA     531
## 3 TRUE  T        25 0.00136      NA     531
## 4 TRUE  W      5318 0.289        NA     531
assessHand(keyRank=531, exclCards=c(1, 4, 28, 38)) # exclude an extra offsuit Q
## # A tibble: 4 × 6
##   qual  res       n     pct handNum keyRank
##   <lgl> <chr> <int>   <dbl>   <int>   <dbl>
## 1 FALSE W      5728 0.331        NA     531
## 2 TRUE  L      6951 0.402        NA     531
## 3 TRUE  T        25 0.00145      NA     531
## 4 TRUE  W      4592 0.265        NA     531

The lowest playing hand is explored for the impact of each additional card excluded:

tmpHand <- allTCPHandAction %>% 
    filter(action=="Play") %>%
    arrange(evPlay) %>%
    slice(1) %>%
    pull(handNum)
tmpRank <- tblHands %>%
    filter(handNum==tmpHand) %>%
    pull(rankOverall)
tmpCards <- aHands[tmpHand,,drop=TRUE]

cat("\nHand number:", 
    tmpHand, 
    "with cards [", 
    paste0(tmpCards, sep=""), 
    "] and rank", 
    tmpRank, 
    "will be explored\n"
    )
## 
## Hand number: 3742 with cards [ 4 6 25 ] and rank 621 will be explored
assessHand(hn=tmpHand)
## # A tibble: 4 × 6
##   qual  res       n     pct handNum keyRank
##   <lgl> <chr> <int>   <dbl>   <int>   <dbl>
## 1 FALSE W      5751 0.312      3742     621
## 2 TRUE  L     12343 0.670      3742     621
## 3 TRUE  T        25 0.00136    3742     621
## 4 TRUE  W       305 0.0166     3742     621
assessHand(keyRank=tmpRank, exclCards=tmpCards)
## # A tibble: 4 × 6
##   qual  res       n     pct handNum keyRank
##   <lgl> <chr> <int>   <dbl>   <int>   <dbl>
## 1 FALSE W      5751 0.312        NA     621
## 2 TRUE  L     12343 0.670        NA     621
## 3 TRUE  T        25 0.00136      NA     621
## 4 TRUE  W       305 0.0166       NA     621
tmpWL <- map_dfr(.x=setdiff(1:52, tmpCards), 
                 .f=function(x) {
                     assessHand(keyRank=tmpRank, exclCards=c(tmpCards, x)) %>%
                         mutate(extraCard=x)
                     }
                 )
tmpWL
## # A tibble: 196 × 7
##    qual  res       n     pct handNum keyRank extraCard
##    <lgl> <chr> <int>   <dbl>   <int>   <dbl>     <int>
##  1 FALSE W      5751 0.333        NA     621         1
##  2 TRUE  L     11215 0.648        NA     621         1
##  3 TRUE  T        25 0.00145      NA     621         1
##  4 TRUE  W       305 0.0176       NA     621         1
##  5 FALSE W      5271 0.305        NA     621         2
##  6 TRUE  L     11735 0.678        NA     621         2
##  7 TRUE  T        25 0.00145      NA     621         2
##  8 TRUE  W       265 0.0153       NA     621         2
##  9 FALSE W      5283 0.305        NA     621         3
## 10 TRUE  L     11723 0.678        NA     621         3
## # ℹ 186 more rows
tmpWL %>%
    mutate(outc=case_when(!qual ~ 1, res=="L" ~ -2, res=="W" ~ 2, res=="T" ~ 0)) %>%
    group_by(extraCard) %>%
    summarize(ev=sum(outc*n)/sum(n), pQual=sum(qual*n)/sum(n)) %>%
    mutate(ecSuit=1+((extraCard-1) %/% 13), 
           ecRank=1+((extraCard-1) %% 13), 
           ecRank=ifelse(ecRank==1, 14, ecRank)
           ) %>%
    ggplot(aes(x=factor(ecRank, levels=14:2), y=ev)) +
    geom_text(aes(y=ev+0.005*ifelse(ev>-1, -1, 1), 
                  label=round(ev, 3), 
                  hjust=ifelse(ev>-1, 1, 0)
                  ), 
              size=2.5
              ) +
    geom_point() + 
    coord_flip() +
    facet_wrap(~ecSuit, nrow=1) +
    geom_hline(yintercept=-1, lty=2) +
    labs(y="EV", 
         x="Card Rank", 
         title=paste0("EV of playing ", 
                      paste0(tmpCards, collapse="-"), 
                      " if one extra card excluded"
                      ), 
         subtitle="Facetted by suit"
         )

Data are integrated to include a full picture of possible outcomes, for use in sampling:

# Table of ante bonus and pair-plus bonus
tblBonus <- tibble::tibble(handType=sort(unique(tblHandTypes$handType)),
                           ppBonus=c(40, 30, 6, 3, 1, -1), 
                           anteBonus=c(5, 4, 1, 0, 0, 0)
                           )
tblBonus
## # A tibble: 6 × 3
##   handType      ppBonus anteBonus
##   <chr>           <dbl>     <dbl>
## 1 01. SF             40         5
## 2 02. Trips          30         4
## 3 03. Straight        6         1
## 4 04. Flush           3         0
## 5 05. Pair            1         0
## 6 06. High Card      -1         0
# Add hand type, optimal action, and pivot longer
# Add qualification, win/loss, main payout, ante bonus, pair-plus bonus
allTCPOutcome <- allTCPHandN %>%
    full_join(select(tblHandTypes, handNum, handType), by="handNum") %>%
    full_join(select(allTCPHandAction, handNum, action), by="handNum") %>%
    pivot_longer(cols=-c(handNum, handType, action), values_to="n") %>%
    mutate(dealerQual=str_sub(name, 3, 3), 
           playerRaw=str_sub(name, 2, 2),
           antePay=case_when(action=="Fold"~-1, 
                             dealerQual=="N"~1, 
                             playerRaw=="W"~1, 
                             playerRaw=="T"~0, 
                             playerRaw=="L"~-1
                             ), 
           playPay=case_when(action=="Fold"~0, 
                             dealerQual=="N"~0, 
                             playerRaw=="W"~1, 
                             playerRaw=="T"~0, 
                             playerRaw=="L"~-1
                             )
           ) %>%
    left_join(tblBonus, by="handType")
allTCPOutcome
## # A tibble: 132,600 × 11
##    handNum handType  action name      n dealerQual playerRaw antePay playPay
##      <int> <chr>     <chr>  <chr> <int> <chr>      <chr>       <dbl>   <dbl>
##  1       1 01. SF    Play   nWN    5706 N          W               1       0
##  2       1 01. SF    Play   nLQ      41 Q          L              -1      -1
##  3       1 01. SF    Play   nTQ       3 Q          T               0       0
##  4       1 01. SF    Play   nWQ   12674 Q          W               1       1
##  5       1 01. SF    Play   nLN       0 N          L               1       0
##  6       1 01. SF    Play   nTN       0 N          T               1       0
##  7       2 04. Flush Play   nWN    5721 N          W               1       0
##  8       2 04. Flush Play   nLQ     896 Q          L              -1      -1
##  9       2 04. Flush Play   nTQ       3 Q          T               0       0
## 10       2 04. Flush Play   nWQ   11804 Q          W               1       1
## # ℹ 132,590 more rows
## # ℹ 2 more variables: ppBonus <dbl>, anteBonus <dbl>
# Confirm expected values
tmpSum <- allTCPOutcome %>%
    select(-handNum) %>%
    summarize(across(where(is.numeric), .fns=function(x) sum(x*n)/sum(n)))
tmpSum %>% select(-n)
## # A tibble: 1 × 4
##   antePay playPay ppBonus anteBonus
##     <dbl>   <dbl>   <dbl>     <dbl>
## 1  -0.101  0.0146 -0.0728    0.0529
tmpSum %>% select(-n, -ppBonus) %>% sum()
## [1] -0.03372981

Expected values are consistent with the summary provided by Wizard of Odds website:

Hands are simulated, with summary statistics calculated:

nSims <- 10000
nPer <- 100

set.seed(23110314)

# Example simulation
allTCPSim <- allTCPOutcome %>% 
    sample_n(size=nSims*nPer, replace=TRUE, weight=n) %>%
    mutate(simNumber=rep(1:nSims, each=nPer))
allTCPSim
## # A tibble: 1,000,000 × 12
##    handNum handType      action name      n dealerQual playerRaw antePay playPay
##      <int> <chr>         <chr>  <chr> <int> <chr>      <chr>       <dbl>   <dbl>
##  1   11774 06. High Card Play   nLQ   11787 Q          L              -1      -1
##  2    9505 06. High Card Play   nLQ   11738 Q          L              -1      -1
##  3   17471 06. High Card Play   nWN    5743 N          W               1       0
##  4   13592 05. Pair      Play   nWQ   10555 Q          W               1       1
##  5   20309 06. High Card Fold   nLN    4116 N          L              -1       0
##  6   14505 06. High Card Fold   nLN    3569 N          L              -1       0
##  7   18505 06. High Card Play   nLQ    5670 Q          L              -1      -1
##  8    2107 05. Pair      Play   nWQ   10767 Q          W               1       1
##  9    1074 06. High Card Play   nWQ    5521 Q          W               1       1
## 10   16537 05. Pair      Play   nLQ    2414 Q          L              -1      -1
## # ℹ 999,990 more rows
## # ℹ 3 more variables: ppBonus <dbl>, anteBonus <dbl>, simNumber <int>
# Example summary of outcomes
allTCPSim %>%
    group_by(simNumber) %>%
    summarize(n=n(), across(c(antePay, playPay, ppBonus, anteBonus), sum)) %>%
    mutate(mainPay=antePay+playPay+anteBonus) %>%
    select(-n, -simNumber) %>%
    summary()
##     antePay          playPay           ppBonus          anteBonus     
##  Min.   :-46.00   Min.   :-26.000   Min.   :-76.000   Min.   : 0.000  
##  1st Qu.:-16.00   1st Qu.: -3.000   1st Qu.:-28.000   1st Qu.: 3.000  
##  Median :-10.00   Median :  2.000   Median :-12.000   Median : 5.000  
##  Mean   :-10.03   Mean   :  1.462   Mean   : -7.122   Mean   : 5.287  
##  3rd Qu.: -4.00   3rd Qu.:  6.000   3rd Qu.: 10.000   3rd Qu.: 7.000  
##  Max.   : 30.00   Max.   : 28.000   Max.   :164.000   Max.   :26.000  
##     mainPay       
##  Min.   :-69.000  
##  1st Qu.:-14.000  
##  Median : -3.000  
##  Mean   : -3.284  
##  3rd Qu.:  8.000  
##  Max.   : 66.000

Simulation means are very close to theoretical game EV

Results by simulation are assessed assuming a 2:1 ratio of Ante-Bonus:

allTCPSim %>%
    mutate(mainBet=(antePay+playPay+anteBonus), overall=2*mainBet+ppBonus) %>%
    group_by(simNumber) %>%
    mutate(ovCum=cumsum(overall), ovMain=cumsum(mainBet), ovBonus=cumsum(ppBonus)) %>%
    summarize(minOverall=min(ovCum), 
              sumOverall=sum(overall), 
              minMain=min(ovMain),
              sumMain=sum(mainBet),
              minBonus=min(ovBonus),
              sumBonus=sum(ppBonus),
              pQ=mean(dealerQual=="Q")
              ) %>%
    summary()
##    simNumber       minOverall        sumOverall         minMain      
##  Min.   :    1   Min.   :-192.00   Min.   :-187.00   Min.   :-70.00  
##  1st Qu.: 2501   1st Qu.: -66.00   1st Qu.: -51.00   1st Qu.:-20.00  
##  Median : 5000   Median : -41.00   Median : -18.00   Median :-12.00  
##  Mean   : 5000   Mean   : -45.35   Mean   : -13.69   Mean   :-13.96  
##  3rd Qu.: 7500   3rd Qu.: -20.00   3rd Qu.:  20.00   3rd Qu.: -6.00  
##  Max.   :10000   Max.   :  42.00   Max.   : 232.00   Max.   :  6.00  
##     sumMain           minBonus         sumBonus             pQ        
##  Min.   :-69.000   Min.   :-76.00   Min.   :-76.000   Min.   :0.5200  
##  1st Qu.:-14.000   1st Qu.:-33.00   1st Qu.:-28.000   1st Qu.:0.6600  
##  Median : -3.000   Median :-22.00   Median :-12.000   Median :0.7000  
##  Mean   : -3.284   Mean   :-22.95   Mean   : -7.122   Mean   :0.6955  
##  3rd Qu.:  8.000   3rd Qu.:-12.00   3rd Qu.: 10.000   3rd Qu.:0.7300  
##  Max.   : 66.000   Max.   : 40.00   Max.   :164.000   Max.   :0.8700

Theoretical means and standard errors are calculated for comparison:

sdCols <- c("qual", "fold",
            "win", "tie", "lose", 
            "antePay", "playPay", "anteBonus", "mainOutcome", 
            "ppBonus", "outcome2_1"
            )

allTCPOutcome %>%
    mutate(mainOutcome=(antePay+playPay+anteBonus), 
           outcome2_1=2*mainOutcome+ppBonus, 
           qual=dealerQual=="Q", 
           win=playerRaw=="W", 
           tie=playerRaw=="T", 
           lose=playerRaw=="L", 
           fold=action=="Fold"
           ) %>%
    pivot_longer(cols=all_of(sdCols), names_to="metric", values_to="metricVal") %>%
    group_by(metric) %>%
    summarize(mu=sum(metricVal*n)/sum(n), 
              variance=sum(n*metricVal**2)/sum(n)-mu**2, 
              sd=sqrt(variance), 
              se100=sd/10
              ) %>%
    mutate(ord=match(metric, sdCols)) %>%
    arrange(ord) %>%
    select(-ord)
## # A tibble: 11 × 5
##    metric            mu variance     sd   se100
##    <chr>          <dbl>    <dbl>  <dbl>   <dbl>
##  1 qual         0.696    0.212   0.460  0.0460 
##  2 fold         0.326    0.220   0.469  0.0469 
##  3 win          0.499    0.250   0.500  0.0500 
##  4 tie          0.00111  0.00111 0.0332 0.00332
##  5 lose         0.499    0.250   0.500  0.0500 
##  6 antePay     -0.101    0.989   0.995  0.0995 
##  7 playPay      0.0146   0.463   0.681  0.0681 
##  8 anteBonus    0.0529   0.122   0.349  0.0349 
##  9 mainOutcome -0.0337   2.69    1.64   0.164  
## 10 ppBonus     -0.0728   8.12    2.85   0.285  
## 11 outcome2_1  -0.140   27.9     5.29   0.529

Simulated and theoretical means and standard errors are compared:

sdCols <- c("qual", "fold",
            "win", "tie", "lose", 
            "antePay", "playPay", "anteBonus", "mainOutcome", 
            "ppBonus", "outcome2_1"
            )

tcpTheoretical <- allTCPOutcome %>%
    mutate(mainOutcome=(antePay+playPay+anteBonus), 
           outcome2_1=2*mainOutcome+ppBonus, 
           qual=dealerQual=="Q", 
           win=playerRaw=="W", 
           tie=playerRaw=="T", 
           lose=playerRaw=="L", 
           fold=action=="Fold"
           ) %>%
    pivot_longer(cols=all_of(sdCols), names_to="metric", values_to="metricVal") %>%
    group_by(metric) %>%
    summarize(mu=sum(metricVal*n)/sum(n), 
              variance=sum(n*metricVal**2)/sum(n)-mu**2, 
              sd=sqrt(variance)
              ) %>%
    mutate(ord=match(metric, sdCols)) %>%
    arrange(ord) %>%
    select(-ord)
tcpTheoretical
## # A tibble: 11 × 4
##    metric            mu variance     sd
##    <chr>          <dbl>    <dbl>  <dbl>
##  1 qual         0.696    0.212   0.460 
##  2 fold         0.326    0.220   0.469 
##  3 win          0.499    0.250   0.500 
##  4 tie          0.00111  0.00111 0.0332
##  5 lose         0.499    0.250   0.500 
##  6 antePay     -0.101    0.989   0.995 
##  7 playPay      0.0146   0.463   0.681 
##  8 anteBonus    0.0529   0.122   0.349 
##  9 mainOutcome -0.0337   2.69    1.64  
## 10 ppBonus     -0.0728   8.12    2.85  
## 11 outcome2_1  -0.140   27.9     5.29
tcpSimSummary <- allTCPSim %>%
    mutate(mainBet=(antePay+playPay+anteBonus), overall=2*mainBet+ppBonus) %>%
    group_by(simNumber) %>%
    mutate(ovCum=cumsum(overall), ovMain=cumsum(mainBet), ovBonus=cumsum(ppBonus)) %>%
    summarize(minOverall=min(ovCum), 
              sumOverall=sum(overall), 
              minMain=min(ovMain),
              sumMain=sum(mainBet),
              minBonus=min(ovBonus),
              sumBonus=sum(ppBonus),
              pQ=mean(dealerQual=="Q")
              )
tcpSimSummary
## # A tibble: 10,000 × 8
##    simNumber minOverall sumOverall minMain sumMain minBonus sumBonus    pQ
##        <int>      <dbl>      <dbl>   <dbl>   <dbl>    <dbl>    <dbl> <dbl>
##  1         1        -15          4      -5       6       -8       -8  0.67
##  2         2        -52         13     -11      -2      -32       17  0.71
##  3         3        -96        -90     -33     -31      -31      -28  0.74
##  4         4        -57          3     -16       4      -28       -5  0.66
##  5         5        -40        -32     -12      -8      -17      -16  0.66
##  6         6        -82        -66     -23     -20      -36      -26  0.77
##  7         7        -97        -83     -36     -25      -35      -33  0.75
##  8         8        -31        -30     -10      -9      -14      -12  0.66
##  9         9        -73        -73     -17     -17      -39      -39  0.65
## 10        10        -50        -50     -11     -11      -28      -28  0.68
## # ℹ 9,990 more rows
# Comparison of the main bet and bonus bet
tmpCompare <- tcpSimSummary %>%
    select(simNumber, sumMain, sumBonus) %>%
    colRenamer(vecRename=c("sumMain"="mainOutcome", "sumBonus"="ppBonus")) %>%
    pivot_longer(cols=-c(simNumber)) %>%
    group_by(name) %>%
    summarize(simMean=mean(value), simSD=sd(value)) %>%
    left_join(tcpTheoretical, by=c("name"="metric"))
tmpCompare
## # A tibble: 2 × 6
##   name        simMean simSD      mu variance    sd
##   <chr>         <dbl> <dbl>   <dbl>    <dbl> <dbl>
## 1 mainOutcome   -3.28  16.5 -0.0337     2.69  1.64
## 2 ppBonus       -7.12  28.8 -0.0728     8.12  2.85
# Comparison of actual and expected SD
tmpCompare %>%
    mutate(sd100=sd*10) %>%
    select(metric=name, sim=simSD, theory=sd100) %>%
    pivot_longer(cols=-c(metric)) %>%
    ggplot(aes(x=metric, y=value)) + 
    geom_point(aes(color=name)) +
    coord_flip() + 
    labs(title="Theoretical and actual SD for 10,000 simulations of 100 hands", 
         y="Standard Deviation (SD)", 
         x=NULL
         ) + 
    scale_color_discrete(NULL) + 
    lims(y=c(0, NA))

# Comparison of actual and expected mean
tmpCompare %>%
    mutate(mean100=mu*100, 
           theorySE=sqrt(100)*sd/sqrt(10000), 
           oneUp=mean100+theorySE, 
           oneDown=mean100-theorySE
           ) %>%
    select(metric=name, 
           sim=simMean, 
           theoryMean=mean100, 
           theorySE, 
           oneUp, 
           oneDown
           ) %>%
    ggplot(aes(x=metric)) + 
    geom_point(aes(y=sim), color="blue") +
    geom_point(aes(y=theoryMean), color="green") +
    geom_errorbar(aes(ymin=oneDown, ymax=oneUp), color="green") +
    coord_flip() + 
    labs(title="Theoretical mean/SE (green) and actual mean (blue)", 
         subtitle="10,000 simulations of 100 hands", 
         y="Mean result of 100 hands (theoretical includes +/- 1 SE)", 
         x=NULL
         )

Associations between main bet and bonus are explored:

# Verify the 2:1 ratio
lm(sumOverall ~ sumMain + sumBonus, data=tcpSimSummary) %>%
    summary()
## Warning in summary.lm(.): essentially perfect fit: summary may be unreliable
## 
## Call:
## lm(formula = sumOverall ~ sumMain + sumBonus, data = tcpSimSummary)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -4.554e-12 -2.900e-15  1.100e-15  3.500e-15  1.779e-12 
## 
## Coefficients:
##              Estimate Std. Error   t value Pr(>|t|)    
## (Intercept) 6.821e-15  5.092e-16 1.340e+01   <2e-16 ***
## sumMain     2.000e+00  3.433e-17 5.825e+16   <2e-16 ***
## sumBonus    1.000e+00  1.967e-17 5.085e+16   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.924e-14 on 9997 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 5.877e+33 on 2 and 9997 DF,  p-value: < 2.2e-16
# Explore associations
tcpSimSummary %>%
    select(-simNumber) %>%
    cor() %>% 
    as.data.frame() %>%
    rownames_to_column("var1") %>%
    tibble::as_tibble() %>%
    pivot_longer(cols=-c(var1), names_to="var2", values_to="corr") %>%
    filter(var1<=var2) %>%
    ggplot(aes(x=var1, y=var2)) +
    geom_tile(aes(fill=corr)) + 
    geom_text(aes(label=round(corr, 3))) + 
    scale_fill_gradient2("Corr:", high="green") + 
    labs(title="Correlation of outcomes in 10,000 simulations, each of 100 hands", 
         y=NULL, 
         x=NULL
         )

Relationships between pQ (percent dealer hands qualifying) and main/bonus bet outcomes are explored:

tcpSimSummary %>%
    select(simNumber, pQ, sumMain, sumBonus) %>%
    pivot_longer(cols=c(sumMain, sumBonus)) %>%
    mutate(r_pQ=round(pQ, 2), r_value=round(5*value)/5) %>%
    count(name, r_pQ, r_value) %>%
    ggplot(aes(x=r_pQ, y=r_value)) + 
    geom_point(aes(size=n), alpha=0.1) + 
    geom_point(data=~summarize(group_by(., name), 
                               across(c(r_pQ, r_value), .fns=function(x) sum(x*n)/sum(n))
                               ), 
               color="red", 
               size=3
               ) +
    geom_smooth(aes(weight=n), method="lm") +
    facet_wrap(~name) + 
    labs(title="Main and bonus bet outcomes associated to dealer percent qualified", 
         subtitle="10,000 simulations, each of 100 hands",
         caption="Red dot is simulation mean",
         x="Percent dealer qualified", y="Outcome (rounded to nearest 5 units)"
         ) + 
    scale_size_continuous("# Sims")
## `geom_smooth()` using formula = 'y ~ x'

Predictive power is explored:

library(ranger) # needs to be called for predict.ranger later

rfTest <- ranger::ranger(sumMain ~ sumBonus + pQ, data=tcpSimSummary)
rfTest
## Ranger result
## 
## Call:
##  ranger::ranger(sumMain ~ sumBonus + pQ, data = tcpSimSummary) 
## 
## Type:                             Regression 
## Number of trees:                  500 
## Sample size:                      10000 
## Number of independent variables:  2 
## Mtry:                             1 
## Target node size:                 5 
## Variable importance mode:         none 
## Splitrule:                        variance 
## OOB prediction error (MSE):       201.4635 
## R squared (OOB):                  0.2585433
lmTest <- lm(sumMain ~ sumBonus + pQ, data=tcpSimSummary)
summary(lmTest)
## 
## Call:
## lm(formula = sumMain ~ sumBonus + pQ, data = tcpSimSummary)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -48.934  -9.549  -0.282   9.491  49.724 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  44.503314   2.121469   20.98   <2e-16 ***
## sumBonus      0.283669   0.004872   58.22   <2e-16 ***
## pQ          -65.804555   3.042600  -21.63   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.02 on 9997 degrees of freedom
## Multiple R-squared:  0.2768, Adjusted R-squared:  0.2766 
## F-statistic:  1913 on 2 and 9997 DF,  p-value: < 2.2e-16

The simple linear model and the base random forest each explain ~25% of the variance in main bet outcomes, given bonus bet outcomes and percent dealer qualified

Performance on holdout data is explored:

set.seed(23111015)
idxTrain <- sample(1:nrow(tcpSimSummary), 
                   size=round(0.7*nrow(tcpSimSummary)), 
                   replace=FALSE
                   )

tcpSimTrain <- tcpSimSummary[idxTrain, ]
tcpSimTest <- tcpSimSummary[-idxTrain, ]

rfTest <- ranger::ranger(sumMain ~ sumBonus + pQ, data=tcpSimTrain)
rfTest
## Ranger result
## 
## Call:
##  ranger::ranger(sumMain ~ sumBonus + pQ, data = tcpSimTrain) 
## 
## Type:                             Regression 
## Number of trees:                  500 
## Sample size:                      7000 
## Number of independent variables:  2 
## Mtry:                             1 
## Target node size:                 5 
## Variable importance mode:         none 
## Splitrule:                        variance 
## OOB prediction error (MSE):       204.4578 
## R squared (OOB):                  0.2506552
lmTest <- lm(sumMain ~ sumBonus + pQ, data=tcpSimTrain)
summary(lmTest)
## 
## Call:
## lm(formula = sumMain ~ sumBonus + pQ, data = tcpSimTrain)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -48.921  -9.659  -0.295   9.591  49.913 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  42.830397   2.540661   16.86   <2e-16 ***
## sumBonus      0.283564   0.005825   48.68   <2e-16 ***
## pQ          -63.656007   3.645259  -17.46   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.07 on 6997 degrees of freedom
## Multiple R-squared:  0.2747, Adjusted R-squared:  0.2745 
## F-statistic:  1325 on 2 and 6997 DF,  p-value: < 2.2e-16
# Apply to holdout data
tcpSimPlotData <- tcpSimTest %>%
    mutate(lmPred=predict(lmTest, newdata=.), 
           rfPred=predict(rfTest, data=.)$predictions
           ) %>%
    select(simNumber, sumMain, lmPred, rfPred) %>%
    pivot_longer(cols=-c(simNumber, sumMain)) %>%
    mutate(err=value-sumMain)
tcpSimPlotData
## # A tibble: 6,000 × 5
##    simNumber sumMain name    value   err
##        <int>   <dbl> <chr>   <dbl> <dbl>
##  1         2      -2 lmPred   2.46  4.46
##  2         2      -2 rfPred   5.25  7.25
##  3         9     -17 lmPred  -9.60  7.40
##  4         9     -17 rfPred  -9.43  7.57
##  5        14     -17 lmPred -10.4   6.64
##  6        14     -17 rfPred -23.1  -6.11
##  7        19     -14 lmPred  -7.98  6.02
##  8        19     -14 rfPred  -5.11  8.89
##  9        23     -13 lmPred -15.8  -2.83
## 10        23     -13 rfPred -17.9  -4.89
## # ℹ 5,990 more rows
# Get statistics
tcpSimPlotData %>%
    group_by(name) %>%
    summarize(n=n(), 
              err2Orig=mean((sumMain-mean(sumMain))**2), 
              err2Pred=mean(err**2), 
              r2=1-err2Pred/err2Orig
              )
## # A tibble: 2 × 5
##   name       n err2Orig err2Pred    r2
##   <chr>  <int>    <dbl>    <dbl> <dbl>
## 1 lmPred  3000     269.     193. 0.281
## 2 rfPred  3000     269.     201. 0.254
tcpSimPlotData %>%
    ggplot(aes(y=sumMain, x=value)) + 
    geom_point(alpha=0.1) + 
    facet_wrap(~name) + 
    labs(title="Predictions on unseen data", x="Predicted Main", y="True Main") +
    geom_smooth(method="lm") + 
    geom_abline(intercept=0, slope=1, lty=2, color="red")
## `geom_smooth()` using formula = 'y ~ x'

Both models perform as well on unseen data as on training data. The relationship known to have perfect predictive power is explored:

rfTest <- ranger::ranger(sumOverall ~ sumMain + sumBonus, data=tcpSimTrain)
rfTest
## Ranger result
## 
## Call:
##  ranger::ranger(sumOverall ~ sumMain + sumBonus, data = tcpSimTrain) 
## 
## Type:                             Regression 
## Number of trees:                  500 
## Sample size:                      7000 
## Number of independent variables:  2 
## Mtry:                             1 
## Target node size:                 5 
## Variable importance mode:         none 
## Splitrule:                        variance 
## OOB prediction error (MSE):       3.762365 
## R squared (OOB):                  0.9986871
lmTest <- lm(sumOverall ~ sumMain + sumBonus, data=tcpSimTrain)
summary(lmTest)
## 
## Call:
## lm(formula = sumOverall ~ sumMain + sumBonus, data = tcpSimTrain)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -9.423e-12 -9.000e-16  1.500e-15  4.500e-15  1.383e-12 
## 
## Coefficients:
##               Estimate Std. Error    t value Pr(>|t|)    
## (Intercept) -1.902e-14  1.475e-15 -1.290e+01   <2e-16 ***
## sumMain      2.000e+00  9.919e-17  2.016e+16   <2e-16 ***
## sumBonus     1.000e+00  5.675e-17  1.762e+16   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.193e-13 on 6997 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 7.051e+32 on 2 and 6997 DF,  p-value: < 2.2e-16
# Apply to holdout data
tcpSimPlotData <- tcpSimTest %>%
    mutate(lmPred=predict(lmTest, newdata=.), 
           rfPred=predict(rfTest, data=.)$predictions
           ) %>%
    select(simNumber, sumOverall, lmPred, rfPred) %>%
    pivot_longer(cols=-c(simNumber, sumOverall)) %>%
    mutate(err=value-sumOverall)
tcpSimPlotData
## # A tibble: 6,000 × 5
##    simNumber sumOverall name   value       err
##        <int>      <dbl> <chr>  <dbl>     <dbl>
##  1         2         13 lmPred  13.0  6.04e-14
##  2         2         13 rfPred  12.5 -4.86e- 1
##  3         9        -73 lmPred -73.0 -1.42e-13
##  4         9        -73 rfPred -73.4 -4.37e- 1
##  5        14        -33 lmPred -33.0  3.55e-14
##  6        14        -33 rfPred -32.6  3.70e- 1
##  7        19        -68 lmPred -68.0 -1.56e-13
##  8        19        -68 rfPred -67.3  6.58e- 1
##  9        23        -69 lmPred -69.0 -1.71e-13
## 10        23        -69 rfPred -68.7  2.52e- 1
## # ℹ 5,990 more rows
# Get statistics
tcpSimPlotData %>%
    group_by(name) %>%
    summarize(n=n(), 
              err2Orig=mean((sumOverall-mean(sumOverall))**2), 
              err2Pred=mean(err**2), 
              r2=1-err2Pred/err2Orig
              )
## # A tibble: 2 × 5
##   name       n err2Orig err2Pred    r2
##   <chr>  <int>    <dbl>    <dbl> <dbl>
## 1 lmPred  3000    2813. 1.42e-26 1    
## 2 rfPred  3000    2813. 4.71e+ 0 0.998
tcpSimPlotData %>%
    ggplot(aes(y=sumOverall, x=value)) + 
    geom_point(alpha=0.1) + 
    facet_wrap(~name) + 
    labs(title="Predictions on unseen data", x="Predicted Overall", y="True Overall") +
    geom_smooth(method="lm") + 
    geom_abline(intercept=0, slope=1, lty=2, color="red")
## `geom_smooth()` using formula = 'y ~ x'

Both models perform very well, though the linear model is slightly better at finding the exact linear relationship among the variables

ECDF are explored for simulation summaries:

percKey <- c(0, 0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99, 1)

tcpSimSummary %>%
    select(minMain) %>%
    arrange(minMain) %>%
    mutate(pct=(row_number()-1)/(n()-1)) %>%
    ggplot(aes(x=minMain, y=pct)) + 
    geom_text(data=~filter(., round(pct,4) %in% percKey), 
              aes(label=paste0(minMain, " (", round(1000*pct)/10, "%)"), 
                  hjust=ifelse(pct %in% c(0, 1), 0, 1),
                  vjust=ifelse(pct==1, 1, 0)
                  ), 
              color="red", 
              size=2.5
              ) +
    geom_point(alpha=0.1, size=0.5) + 
    labs(title="ECDF of minimum position of main bet over 100 hands", 
         y=NULL, 
         x="Minimum position of main bet (100 hands)", 
         subtitle="10,000 simulations"
         )

ECDF are updated to include final main bet summaries:

# Need better rounding for percentiles and selection
percKey <- c(0, 0.05, 0.25, 0.5001, 0.75, 0.95, 1)

tcpSimSummary %>%
    select(simNumber, minMain, sumMain) %>%
    pivot_longer(cols=-c(simNumber)) %>%
    arrange(name, value) %>%
    group_by(name) %>%
    mutate(pct=(row_number()-1)/(n()-1)) %>%
    ggplot(aes(x=value, y=pct)) + 
    geom_text(data=~filter(., round(pct,4) %in% percKey), 
              aes(label=paste0(value, " (", round(1000*pct)/10, "%)"), 
                  hjust=ifelse(pct %in% c(0, 1), 0, 1),
                  vjust=ifelse(pct==1, 1, 0)
                  ), 
              color="red", 
              size=2.5
              ) +
    geom_point(alpha=0.1, size=0.5) + 
    facet_wrap(~name) +
    labs(title="ECDF of main bet over 100 hands", 
         y=NULL, 
         x="Main bet (100 hands)", 
         subtitle="10,000 simulations"
         )

The process is updated for better percentile matching:

percKey <- c(0, 0.05, 0.25, 0.5, 0.75, 0.95, 1)

# Function to get closest match in x to y
getBestMatch <- function(x, y, preferLess=TRUE) {
    x <- sort(x)
    op <- vector("numeric", length(y))
    
    for(ctr in 1:length(y)) {
        av <- abs(x-y[ctr])
        wm <- which.min(av)
        op[ctr] <- x[wm[ifelse(isTRUE(preferLess), 1, length(wm))]]
    }
 
    op
    
}

# Check pulling reasonable matches
getBestMatch((0:9999)/9999, percKey)
## [1] 0.000000 0.050005 0.250025 0.500050 0.749975 0.949995 1.000000
tcpSimSummary %>%
    select(simNumber, minMain, sumMain) %>%
    pivot_longer(cols=-c(simNumber)) %>%
    arrange(name, value) %>%
    group_by(name) %>%
    mutate(pct=(row_number()-1)/(n()-1)) %>%
    ggplot(aes(x=value, y=pct)) + 
    geom_text(data=~filter(., pct %in% getBestMatch(.$pct, percKey)), 
              aes(label=paste0(value, " (", round(1000*pct)/10, "%)"), 
                  hjust=ifelse(pct %in% c(0, 1), 0, 1),
                  vjust=ifelse(pct==1, 1, 0)
                  ), 
              color="red", 
              size=2.5
              ) +
    geom_point(alpha=0.1, size=0.5) + 
    facet_wrap(~name) +
    labs(title="ECDF of main bet over 100 hands", 
         y=NULL, 
         x="Main bet (100 hands)", 
         subtitle="10,000 simulations"
         )

The ECDF process is converted to functional form:

getSimECDF <- function(df, 
                       ecdfVars, 
                       uqid="simNumber", 
                       percPoint=c(),
                       labObj=labs()
                       ) {
    
    # FUNCTION ARGUMENTS:
    # df: data frame containing simulation data
    # ecdfVars: variables to use in creating ECDF
    # uqid: unique id in simulations
    # percPoint: percentiles to call out values on ECDF plot
    # labObj: labelling object
    
    p1 <- df %>%
        select(all_of(c(uqid, ecdfVars))) %>%
        pivot_longer(cols=-c(all_of(uqid))) %>%
        arrange(name, value) %>%
        group_by(name) %>%
        mutate(pct=(row_number()-1)/(n()-1)) %>%
        ggplot(aes(x=value, y=pct)) + 
        geom_text(data=~filter(., pct %in% getBestMatch(.$pct, percPoint)), 
                  aes(label=paste0(value, " (", round(1000*pct)/10, "%)"), 
                  hjust=ifelse(pct %in% c(0, 1), 0, 1),
                  vjust=ifelse(pct==1, 1, 0)
                  ), 
                  color="red", 
                  size=2.5
                  ) +
        geom_point(alpha=0.1, size=0.5) + 
        labObj

    if(length(ecdfVars)>1) p1 <- p1 + facet_wrap(~name, nrow=1)
    
    p1
    
}

keyLabs <- labs(title="ECDF over 100 hands", 
                subtitle="10,000 simulations", 
                y=NULL, 
                x=NULL
                )

# No percent-values plotted, no labels
getSimECDF(tcpSimSummary, ecdfVars=c("minMain"))

getSimECDF(tcpSimSummary, ecdfVars=c("minMain", "sumMain"))

# Percent-values plotted, standard labels
getSimECDF(tcpSimSummary, ecdfVars=c("minMain"), percPoint=percKey, labObj=keyLabs)

getSimECDF(tcpSimSummary, 
           ecdfVars=c("minMain", "sumMain"), 
           percPoint=percKey, 
           labObj=keyLabs
           )

# Customized labels
cLab <- keyLabs
cLab$x <- "Main bet minimum (units)"
getSimECDF(tcpSimSummary, 
           ecdfVars=c("minMain"), 
           percPoint=percKey, 
           labObj=cLab
           )

cLab$x <- "Main bet (units)"
getSimECDF(tcpSimSummary, 
           ecdfVars=c("minMain", "sumMain"), 
           percPoint=percKey, 
           labObj=cLab
           )

The ECDF process is run for bonus bets:

# Customized labels
cLab$x <- "Bonus bet (units)"
getSimECDF(tcpSimSummary, 
           ecdfVars=c("minBonus", "sumBonus"), 
           percPoint=percKey, 
           labObj=cLab
           )

The ECDF process is run for overall outcome:

# Customized labels
cLab$x <- "Overall (units)\n2*Main + Bonus"
getSimECDF(tcpSimSummary, 
           ecdfVars=c("minOverall", "sumOverall"), 
           percPoint=percKey, 
           labObj=cLab
           )

Relationships between minimum and final are explored:

lmSim <- lm(sumOverall ~ minOverall, data=tcpSimSummary)
summary(lmSim)
## 
## Call:
## lm(formula = sumOverall ~ minOverall, data = tcpSimSummary)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -64.99 -21.27  -7.12  13.85 207.10 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 48.964429   0.532423   91.97   <2e-16 ***
## minOverall   1.381617   0.009622  143.59   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 30.51 on 9998 degrees of freedom
## Multiple R-squared:  0.6734, Adjusted R-squared:  0.6734 
## F-statistic: 2.062e+04 on 1 and 9998 DF,  p-value: < 2.2e-16
tcpSimSummary %>%
    select(simNumber, minOverall, sumOverall) %>%
    count(minOverall, sumOverall) %>%
    ggplot(aes(x=minOverall, y=sumOverall)) + 
    geom_point(aes(size=n), alpha=0.1) + 
    labs(x="Overall minimum", 
         y="Overall final", 
         title="Final vs. minimum (overall)"
         ) + 
    geom_smooth(method="lm", aes(weight=n)) + 
    geom_abline(slope=1, intercept=0, color="red", lty=2) + 
    geom_text(data=~summarize(., 
                              minOverall=min(minOverall), 
                              sumOverall=max(sumOverall)
                              ), 
              aes(label=paste0("y = ", 
                               round(coef(lmSim)[1], 1), 
                               " + ", 
                               round(coef(lmSim)[2], 3), 
                               "*x\n", 
                               "R2: ", 
                               round(summary(lmSim)$r.squared, 3)
                               )
                  ), 
              hjust=0, 
              vjust=1
              )
## `geom_smooth()` using formula = 'y ~ x'

The process is converted to functional form:

finalVsMin <- function(df, y, x, txt="") {
    
    # FUNCTION ARGUMENTS:
    # df: data frame with simulations
    # y: dependent variable
    # x: independent variable
    # txt: descriptor for chart labeling

    tmpLM <- lm(get(y) ~ get(x), data=df)
    summary(tmpLM)
    
    p1 <- df %>%
        group_by(across(all_of(c(y, x)))) %>%
        summarize(n=n(), .groups="drop") %>%
        ggplot(aes(x=.data[[x]], y=.data[[y]])) + 
        geom_point(aes(size=n), alpha=0.1) + 
        labs(x=paste0(txt, " Minimum"), 
             y=paste0(txt, " Final"), 
             title=paste0("Final vs. Minimum", 
                          ifelse(txt=="", "", paste0(" (", txt, ")"))
                          )
             ) + 
        geom_smooth(method="lm", aes(weight=n)) + 
        geom_abline(slope=1, intercept=0, color="red", lty=2) + 
        geom_text(data=~summarize(., 
                                  minX=min(get(x)), 
                                  maxY=max(get(y))
                                  ), 
                  aes(x=minX, 
                      y=maxY, 
                      label=paste0("y = ", 
                                   round(coef(tmpLM)[1], 1), 
                                   " + ", 
                                   round(coef(tmpLM)[2], 3), 
                                   "*x\n", 
                                   "R2: ", 
                                   round(summary(tmpLM)$r.squared, 3)
                                   )
                      ), 
                  hjust=0, 
                  vjust=1
                  )

    print(p1)
    
}

finalVsMin(tcpSimSummary, y="sumOverall", x="minOverall", txt="Overall")
## `geom_smooth()` using formula = 'y ~ x'

The process is run for the main bet and bonus bet:

finalVsMin(tcpSimSummary, y="sumMain", x="minMain", txt="Main")
## `geom_smooth()` using formula = 'y ~ x'

finalVsMin(tcpSimSummary, y="sumBonus", x="minBonus", txt="Bonus")
## `geom_smooth()` using formula = 'y ~ x'

The function is run for comparing end results:

finalVsMin(tcpSimSummary, y="sumOverall", x="sumMain", txt="Main")
## `geom_smooth()` using formula = 'y ~ x'

finalVsMin(tcpSimSummary, y="sumOverall", x="sumBonus", txt="Bonus")
## `geom_smooth()` using formula = 'y ~ x'

The function can be updated for better control of title and axis labels:

finalVsMin <- function(df, y, x, txt="", xTxt="Minimum", yTxt="Final") {
    
    # FUNCTION ARGUMENTS:
    # df: data frame with simulations
    # y: dependent variable
    # x: independent variable
    # txt: descriptor for chart labeling
    # xTxt: x-axis descriptor for chart labeling
    # yTxt: y-axis descriptor for chart labeling

    # Create text labels
    titleTxt <- paste0(yTxt, 
                       " vs. ", 
                       xTxt, 
                       ifelse(txt=="", "", paste0(" (", txt, ")"))
                       )
    xTxt <- paste0(ifelse(txt=="", "", paste0(txt, " ")), xTxt)
    yTxt <- paste0(ifelse(txt=="", "", paste0(txt, " ")), yTxt)
    
    
    # Run linear regression
    tmpLM <- lm(get(y) ~ get(x), data=df)
    
    p1 <- df %>%
        group_by(across(all_of(c(y, x)))) %>%
        summarize(n=n(), .groups="drop") %>%
        ggplot(aes(x=.data[[x]], y=.data[[y]])) + 
        geom_point(aes(size=n), alpha=0.1) + 
        labs(x=xTxt, y=yTxt, title=titleTxt) + 
        geom_smooth(method="lm", aes(weight=n)) + 
        geom_abline(slope=1, intercept=0, color="red", lty=2) + 
        geom_text(data=~summarize(., 
                                  minX=min(get(x)), 
                                  maxY=max(get(y))
                                  ), 
                  aes(x=minX, 
                      y=maxY, 
                      label=paste0("y = ", 
                                   round(coef(tmpLM)[1], 1), 
                                   " + ", 
                                   round(coef(tmpLM)[2], 3), 
                                   "*x\n", 
                                   "R2: ", 
                                   round(summary(tmpLM)$r.squared, 3)
                                   )
                      ), 
                  hjust=0, 
                  vjust=1
                  )

    print(p1)
    
}

finalVsMin(tcpSimSummary, y="sumOverall", x="minOverall", txt="Overall")
## `geom_smooth()` using formula = 'y ~ x'

finalVsMin(tcpSimSummary, 
           y="sumOverall", 
           x="sumMain", 
           txt="Final", 
           xTxt="Main", 
           yTxt="Overall"
           )
## `geom_smooth()` using formula = 'y ~ x'

finalVsMin(tcpSimSummary, 
           y="sumOverall", 
           x="sumBonus", 
           txt="Final", 
           xTxt="Bonus", 
           yTxt="Overall"
           )
## `geom_smooth()` using formula = 'y ~ x'

The results of regressions are compared using bonus, given Overall = 2*Main + Bonus:

# Regression of main vs. bonus
lmMainBonus <- lm(sumMain ~ sumBonus, data=tcpSimSummary)
summary(lmMainBonus)
## 
## Call:
## lm(formula = sumMain ~ sumBonus, data = tcpSimSummary)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -53.894  -9.632  -0.176   9.637  51.953 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.272799   0.147762  -8.614   <2e-16 ***
## sumBonus     0.282307   0.004984  56.638   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.34 on 9998 degrees of freedom
## Multiple R-squared:  0.2429, Adjusted R-squared:  0.2428 
## F-statistic:  3208 on 1 and 9998 DF,  p-value: < 2.2e-16
# Expected coefficients for Overall ~ Bonus
cat("\nExpected coefficients for Overall ~ Bonus:\n", 
    "Intercept: ", round(2*coef(lmMainBonus)[1], 3), 
    "\nSlope: ", 1 + round(2*coef(lmMainBonus)[2], 3),
    "\n", 
    sep=""
    )
## 
## Expected coefficients for Overall ~ Bonus:
## Intercept: -2.546
## Slope: 1.565
# Actual coefficients for Overall ~ Bonus
lmOverallBonus <- lm(sumOverall ~ sumBonus, data=tcpSimSummary)
cat("\nActual coefficients for Overall ~ Bonus:\n", 
    "Intercept: ", round(coef(lmOverallBonus)[1], 3), 
    "\nSlope: ", round(coef(lmOverallBonus)[2], 3),
    "\nR-squared: ", round(summary(lmOverallBonus)$r.squared, 3),
    "\n", 
    sep=""
    )
## 
## Actual coefficients for Overall ~ Bonus:
## Intercept: -2.546
## Slope: 1.565
## R-squared: 0.711

The results of regressions are compared using main, given Overall = 2*Main + Bonus:

# Regression of main vs. bonus
lmBonusMain <- lm(sumBonus ~ sumMain, data=tcpSimSummary)
summary(lmBonusMain)
## 
## Call:
## lm(formula = sumBonus ~ sumMain, data = tcpSimSummary)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -78.679 -17.796  -3.819  14.280 145.926 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.29713    0.25533  -16.83   <2e-16 ***
## sumMain      0.86044    0.01519   56.64   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 25.04 on 9998 degrees of freedom
## Multiple R-squared:  0.2429, Adjusted R-squared:  0.2428 
## F-statistic:  3208 on 1 and 9998 DF,  p-value: < 2.2e-16
# Expected coefficients for Overall ~ Main
cat("\nExpected coefficients for Overall ~ Main:\n", 
    "Intercept: ", round(coef(lmBonusMain)[1], 3), 
    "\nSlope: ", 2 + round(coef(lmBonusMain)[2], 3),
    "\n", 
    sep=""
    )
## 
## Expected coefficients for Overall ~ Main:
## Intercept: -4.297
## Slope: 2.86
# Actual coefficients for Overall ~ Main
lmOverallMain <- lm(sumOverall ~ sumMain, data=tcpSimSummary)
cat("\nActual coefficients for Overall ~ Main:\n", 
    "Intercept: ", round(coef(lmOverallMain)[1], 3), 
    "\nSlope: ", round(coef(lmOverallMain)[2], 3),
    "\nR-squared: ", round(summary(lmOverallMain)$r.squared, 3),
    "\n", 
    sep=""
    )
## 
## Actual coefficients for Overall ~ Main:
## Intercept: -4.297
## Slope: 2.86
## R-squared: 0.78

The results of regressions on minimum values are compared using bonus, given Overall = 2*Main + Bonus:

# Regression of main vs. bonus
lmMinMainBonus <- lm(minMain ~ minBonus, data=tcpSimSummary)
summary(lmMinMainBonus)
## 
## Call:
## lm(formula = minMain ~ minBonus, data = tcpSimSummary)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -46.770  -5.481   1.212   6.535  25.586 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5.788139   0.177251  -32.66   <2e-16 ***
## minBonus     0.355966   0.006591   54.01   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.244 on 9998 degrees of freedom
## Multiple R-squared:  0.2258, Adjusted R-squared:  0.2258 
## F-statistic:  2917 on 1 and 9998 DF,  p-value: < 2.2e-16
# (Rough) expected coefficients for Overall ~ Bonus
cat("\nExpected (rough) coefficients for minimum Overall ~ Bonus:\n", 
    "Intercept: ", round(2*coef(lmMinMainBonus)[1], 3), 
    "\nSlope: ", 1 + round(2*coef(lmMinMainBonus)[2], 3),
    "\n", 
    sep=""
    )
## 
## Expected (rough) coefficients for minimum Overall ~ Bonus:
## Intercept: -11.576
## Slope: 1.712
# Actual coefficients for Overall ~ Bonus
lmMinOverallBonus <- lm(minOverall ~ minBonus, data=tcpSimSummary)
cat("\nActual coefficients for minimum Overall ~ Bonus:\n", 
    "Intercept: ", round(coef(lmMinOverallBonus)[1], 3), 
    "\nSlope: ", round(coef(lmMinOverallBonus)[2], 3),
    "\nR-squared: ", round(summary(lmMinOverallBonus)$r.squared, 3),
    "\n", 
    sep=""
    )
## 
## Actual coefficients for minimum Overall ~ Bonus:
## Intercept: -5.697
## Slope: 1.728
## R-squared: 0.584
# Actual regression for minima
lm(minOverall ~ minMain + minBonus, data=tcpSimSummary) %>%
    summary()
## 
## Call:
## lm(formula = minOverall ~ minMain + minBonus, data = tcpSimSummary)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -7.514 -4.256 -1.866  2.426 41.657 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 6.535535   0.122050   53.55   <2e-16 ***
## minMain     2.113371   0.006546  322.84   <2e-16 ***
## minBonus    0.975744   0.004903  199.00   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.05 on 9997 degrees of freedom
## Multiple R-squared:  0.9636, Adjusted R-squared:  0.9636 
## F-statistic: 1.323e+05 on 2 and 9997 DF,  p-value: < 2.2e-16

Timing of first hitting the overall minimum and maximum by simulation is calculated:

# Pull first occurrence of minima and maxima
tcpSimWhich <- allTCPSim %>%
    mutate(mainBet=(antePay+playPay+anteBonus), overall=2*mainBet+ppBonus) %>%
    group_by(simNumber) %>%
    mutate(ovCum=cumsum(overall), ovMain=cumsum(mainBet), ovBonus=cumsum(ppBonus)) %>%
    summarize(across(c(ovCum, ovMain, ovBonus), 
                     .fns=list(whichmin=function(x) which.min(x), 
                               whichmax=function(x) which.max(x)
                               )
                     )
              )
tcpSimWhich
## # A tibble: 10,000 × 7
##    simNumber ovCum_whichmin ovCum_whichmax ovMain_whichmin ovMain_whichmax
##        <int>          <int>          <int>           <int>           <int>
##  1         1              7             61               7              61
##  2         2             72            100              72              40
##  3         3             92             23              92              23
##  4         4             43             85              43              85
##  5         5             94             22              75               9
##  6         6             82              2              82               2
##  7         7             65              5              65               5
##  8         8             99             55              99              55
##  9         9            100              8             100              61
## 10        10            100              6             100              63
## # ℹ 9,990 more rows
## # ℹ 2 more variables: ovBonus_whichmin <int>, ovBonus_whichmax <int>

Distributions for hand where maximum is first achieved are plotted:

tcpSimWhich %>%
    select(simNumber, ends_with("max")) %>%
    pivot_longer(cols=-c(simNumber)) %>%
    mutate(useName=str_replace_all(name, pattern="_whichmax", replacement="")) %>%
    count(useName, value) %>%
    ggplot(aes(x=value, y=n)) + 
    geom_line(aes(group=useName, color=useName)) + 
    labs(title="Hand number where overall maximum is first obtained", 
         subtitle="10,000 simulations, each of 100 hands", 
         y="# Simulations\n(log-10 scale)", 
         x="Hand where overall maximum is first achieved"
         ) + 
    scale_color_discrete("Bet Type") + 
    scale_y_log10() + 
    geom_text(data=~filter(., value %in% c(1, 100)), 
              aes(label=n, color=useName, hjust=ifelse(value==1, 1, 0)), 
              size=2.5
              )

As expected given negative per-hand expectation, maximum is typically reached early in the simulation

Distributions for hand where minimum is first achieved are plotted:

tcpSimWhich %>%
    select(simNumber, ends_with("min")) %>%
    pivot_longer(cols=-c(simNumber)) %>%
    mutate(useName=str_replace_all(name, pattern="_whichmin", replacement="")) %>%
    count(useName, value) %>%
    ggplot(aes(x=value, y=n)) + 
    geom_line(aes(group=useName, color=useName)) + 
    labs(title="Hand number where overall minimum is first obtained", 
         subtitle="10,000 simulations, each of 100 hands", 
         y="# Simulations\n(log-10 scale)", 
         x="Hand where overall minimum is first achieved"
         ) + 
    scale_color_discrete("Bet Type") + 
    scale_y_log10() + 
    geom_text(data=~filter(., value %in% c(1, 100)), 
              aes(label=n, color=useName, hjust=ifelse(value==1, 1, 0)), 
              size=2.5
              )

As expected given negative per-hand expectation, minimum is typically reached late in the simulation

Time for reaching minimum and maximum is compared (overall):

tcpSimWhich %>%
    select(simNumber, starts_with("ovCum")) %>%
    pivot_longer(cols=-c(simNumber)) %>%
    mutate(useName=str_replace_all(name, pattern="_which.*", replacement=""), 
           metric=str_replace_all(name, pattern=".*_which", replacement="")
           ) %>%
    pivot_wider(id_cols=c("simNumber", "useName"), 
                names_from="metric", 
                values_from="value"
                ) %>%
    count(useName, min, max) %>%
    ggplot(aes(x=min, y=max)) + 
    geom_point(aes(size=n), alpha=0.25) + 
    labs(title="Hand number where overall minimum and maximum is first obtained", 
         subtitle="10,000 simulations, each of 100 hands", 
         y="Hand where overall maximum is first achieved", 
         x="Hand where overall minimum is first achieved", 
         caption="Red point is weighted mean"
         ) + 
    scale_size_continuous("# Sims") + 
    geom_smooth(method="lm", aes(weight=n)) + 
    geom_point(data=~summarize(., across(c(min, max), 
                                         .fns=function(x) sum(x*n)/sum(n)
                                         )
                               ), 
               size=5, 
               color="red"
               )
## `geom_smooth()` using formula = 'y ~ x'

Time for reaching minimum and maximum is compared (all):

tcpSimWhich %>%
    select(simNumber, starts_with("ov")) %>%
    pivot_longer(cols=-c(simNumber)) %>%
    mutate(useName=str_replace_all(name, pattern="_which.*", replacement=""), 
           metric=str_replace_all(name, pattern=".*_which", replacement="")
           ) %>%
    pivot_wider(id_cols=c("simNumber", "useName"), 
                names_from="metric", 
                values_from="value"
                ) %>%
    count(useName, min, max) %>%
    ggplot(aes(x=min, y=max)) + 
    geom_point(aes(size=n), alpha=0.25) + 
    labs(title="Hand number where minimum and maximum is first obtained", 
         subtitle="10,000 simulations, each of 100 hands", 
         y="Hand where maximum is first achieved", 
         x="Hand where minimum is first achieved", 
         caption="Red point is weighted mean"
         ) + 
    scale_size_continuous("# Sims") + 
    geom_smooth(method="lm", aes(weight=n)) + 
    facet_wrap(~useName) +
    geom_point(data=~summarize(group_by(., useName), 
                               across(c(min, max), .fns=function(x) sum(x*n)/sum(n))
                               ), 
               size=5, 
               color="red"
               )
## `geom_smooth()` using formula = 'y ~ x'

Data for being at an all-time low/high is explored:

# Get new minima and maxima
tcpSimAllTime <- allTCPSim %>%
    mutate(mainBet=(antePay+playPay+anteBonus), overall=2*mainBet+ppBonus) %>%
    group_by(simNumber) %>%
    mutate(ovCum=cumsum(overall), ovMain=cumsum(mainBet), ovBonus=cumsum(ppBonus)) %>%
    select(simNumber, ovCum, ovMain, ovBonus) %>%
    mutate(hn=row_number()) %>%
    ungroup() %>%
    pivot_longer(cols=-c(simNumber, hn)) %>%
    arrange(simNumber, name, hn) %>%
    group_by(simNumber, name) %>%
    mutate(cmax=cummax(value), 
           cmin=cummin(value),
           newmax=(cmax==value & (hn==1 | cmax>lag(cmax))), 
           newmin=(cmin==value & (hn==1 | cmin<lag(cmin))),
           tiemax=(cmax==value) & !newmax, 
           tiemin=(cmax==value) & !newmin
           ) %>%
    ungroup()
tcpSimAllTime
## # A tibble: 3,000,000 × 10
##    simNumber    hn name    value  cmax  cmin newmax newmin tiemax tiemin
##        <int> <int> <chr>   <dbl> <dbl> <dbl> <lgl>  <lgl>  <lgl>  <lgl> 
##  1         1     1 ovBonus    -1    -1    -1 TRUE   TRUE   FALSE  FALSE 
##  2         1     2 ovBonus    -2    -1    -2 FALSE  TRUE   FALSE  FALSE 
##  3         1     3 ovBonus    -3    -1    -3 FALSE  TRUE   FALSE  FALSE 
##  4         1     4 ovBonus    -2    -1    -3 FALSE  FALSE  FALSE  FALSE 
##  5         1     5 ovBonus    -3    -1    -3 FALSE  FALSE  FALSE  FALSE 
##  6         1     6 ovBonus    -4    -1    -4 FALSE  TRUE   FALSE  FALSE 
##  7         1     7 ovBonus    -5    -1    -5 FALSE  TRUE   FALSE  FALSE 
##  8         1     8 ovBonus    -4    -1    -5 FALSE  FALSE  FALSE  FALSE 
##  9         1     9 ovBonus    -5    -1    -5 FALSE  FALSE  FALSE  FALSE 
## 10         1    10 ovBonus    -4    -1    -5 FALSE  FALSE  FALSE  FALSE 
## # ℹ 2,999,990 more rows

Probability for being at an all-time high for overall is plotted:

# Probability for being at all-time high overall
tcpSimAllTime %>% 
    filter(name=="ovCum") %>% 
    count(hn, newmax, tiemax) %>% 
    group_by(hn) %>% 
    mutate(pct=n/sum(n), 
           type=case_when(newmax~"allHigh", tiemax~"tieHigh", TRUE~"none")
           ) %>% 
    ungroup() %>% 
    pivot_wider(id_cols="hn", names_from="type", values_from="pct", values_fill=0) %>%
    pivot_longer(cols=-hn) %>% 
    filter(hn>1) %>% 
    ggplot(aes(x=hn, y=value)) +
    geom_text(data=~filter(., hn==100), 
              aes(label=round(value, 3)), 
              hjust=0, 
              size=2.5
              ) +
    geom_line(aes(group=name, 
                  color=c("allHigh"="3. New high", 
                          "tieHigh"="2. Ties previous high", 
                          "none"="1. Below previous high"
                          )[name]
                  )
              ) + 
    labs(x="Hand Number", 
         y="Proportion of simulations", 
         title="Likelihood of new overall maximum by hand number", 
         caption="Hand 1 not plotted (always a new high)"
         ) + 
    scale_color_discrete(NULL) + 
    theme(legend.position = "right")

Probability for being at an all-time low for overall is plotted:

# Probability for being at all-time low overall
tcpSimAllTime %>% 
    filter(name=="ovCum") %>% 
    count(hn, newmin, tiemin) %>% 
    group_by(hn) %>% 
    mutate(pct=n/sum(n), 
           type=case_when(newmin~"allLow", tiemin~"tieLow", TRUE~"none")
           ) %>% 
    ungroup() %>% 
    pivot_wider(id_cols="hn", names_from="type", values_from="pct", values_fill=0) %>%
    pivot_longer(cols=-hn) %>% 
    filter(hn>1) %>% 
    ggplot(aes(x=hn, y=value)) + 
    geom_text(data=~filter(., hn==100), 
              aes(label=round(value, 3)), 
              hjust=0, 
              size=2.5
              ) +
    geom_line(aes(group=name, 
                  color=c("allLow"="3. New low", 
                          "tieLow"="2. Ties previous low", 
                          "none"="1. Above previous low"
                          )[name]
                  )
              ) + 
    labs(x="Hand Number", 
         y="Proportion of simulations", 
         title="Likelihood of new overall minimum by hand number", 
         caption="Hand 1 not plotted (always a new low)"
         ) + 
    scale_color_discrete(NULL) + 
    theme(legend.position = "right")

As expected with negative expectation, it is more common to reach new lows than new highs as the simulation progresses

Probability for being at an all-time high for all three metrics is plotted:

# Probability for being at all-time high
tcpSimAllTime %>%
    count(name, hn, newmax, tiemax) %>% 
    group_by(name, hn) %>% 
    mutate(pct=n/sum(n), 
           type=case_when(newmax~"allHigh", tiemax~"tieHigh", TRUE~"none")
           ) %>% 
    ungroup() %>% 
    rename("metric"="name") %>%
    pivot_wider(id_cols=c("metric", "hn"), 
                names_from="type", 
                values_from="pct", 
                values_fill=0
                ) %>%
    pivot_longer(cols=-c(metric, hn)) %>% 
    filter(hn>1) %>% 
    ggplot(aes(x=hn, y=value)) +
    geom_text(data=~filter(., hn==100), 
              aes(label=round(value, 3)), 
              hjust=0, 
              size=2.5
              ) +
    geom_line(aes(group=name, 
                  color=c("allHigh"="3. New high", 
                          "tieHigh"="2. Ties previous high", 
                          "none"="1. Below previous high"
                          )[name]
                  )
              ) + 
    labs(x="Hand Number", 
         y="Proportion of simulations", 
         title="Likelihood of new overall maximum by hand number", 
         caption="Hand 1 not plotted (always a new high)"
         ) + 
    scale_color_discrete(NULL) + 
    theme(legend.position = "right") + 
    facet_wrap(~metric) + 
    theme(legend.position="bottom")